diff options
Diffstat (limited to 'coregrind/m_debuginfo/.svn/text-base')
23 files changed, 25271 insertions, 0 deletions
diff --git a/coregrind/m_debuginfo/.svn/text-base/README.txt.svn-base b/coregrind/m_debuginfo/.svn/text-base/README.txt.svn-base new file mode 100644 index 0000000..ea96ba9 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/README.txt.svn-base @@ -0,0 +1,57 @@ + +On 4 Apr 06, the debuginfo reader (m_debuginfo) was majorly cleaned up +and restructured. It has been a bit of a tangle for a while. The new +structure looks like this: + + debuginfo.c + + readelf.c + + readdwarf.c readstabs.c + + storage.c + +Each .c can only call those below it on the page. + +storage.c contains the SegInfo structure and stuff for +maintaining/searching arrays of symbols, line-numbers, and Dwarf CF +info records. + +readdwarf.c and readstabs.c parse the relevant kind of info and +call storage.c to store the results. + +readelf.c reads ELF format, hands syms directly to storage.c, +then delegates to readdwarf.c/readstabs.c for debug info. All +straightforward. + +debuginfo.c is the top-level file, and is quite small. + +There are 3 goals to this: + +(1) Generally tidy up something which needs tidying up + +(2) Introduce more modularity, so as to make it easier to add + readers for other formats, if needed + +(3) Simplify the stabs reader. + +Rationale for (1) and (2) are obvious. + +Re (3), the stabs reader has for a good year contained a sophisticated +and impressive parser for stabs strings, with the aim of recording in +detail the types of variables (I think) (Jeremy's work). Unfortunately +that has caused various segfaults reading stabs info in the past few months +(#77869, #117936, #119914, #120345 and another to do with deeply nested +template types). + +The worst thing is that it is the stabs type reader that is crashing, +not the stabs line-number reader, but the type info is only used by +Helgrind, which is looking pretty dead at the moment. So I have lifed +out the type-reader code and put it in UNUSED_STABS.txt for safe +storage, just leaving the line-number reader in place. + +If Helgrind ever does come back to life we will need to reinstate the +type storage/reader stuff but with DWARF as its primary target. +Placing the existing stabs type-reader in hibernation improves +stability whilst retaining the development effort/expertise that went +into it for possible future reinstatement. diff --git a/coregrind/m_debuginfo/.svn/text-base/UNUSED_STABS.txt.svn-base b/coregrind/m_debuginfo/.svn/text-base/UNUSED_STABS.txt.svn-base new file mode 100644 index 0000000..3879026 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/UNUSED_STABS.txt.svn-base @@ -0,0 +1,1483 @@ + +/*--------------------------------------------------------------------*/ +/*--- Extract type info from debug info. symtypes.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2005 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "pub_core_basics.h" +#include "pub_core_debuginfo.h" +#include "pub_core_debuglog.h" // For VG_(debugLog_vprintf) +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_libcsignal.h" +#include "pub_core_machine.h" +#include "pub_core_mallocfree.h" + +#include "priv_symtypes.h" + +typedef enum { + TyUnknown, /* unknown type */ + TyUnresolved, /* unresolved type */ + TyError, /* error type */ + + TyVoid, /* void */ + + TyInt, /* integer */ + TyBool, /* boolean */ + TyChar, /* character */ + TyFloat, /* float */ + + TyRange, /* type subrange */ + + TyEnum, /* enum */ + + TyPointer, /* pointer */ + TyArray, /* array */ + TyStruct, /* structure/class */ + TyUnion, /* union */ + + TyTypedef /* typedef */ +} TyKind; + +static const Char *ppkind(TyKind k) +{ + switch(k) { +#define S(x) case x: return #x + S(TyUnknown); + S(TyUnresolved); + S(TyError); + S(TyVoid); + S(TyInt); + S(TyBool); + S(TyChar); + S(TyRange); + S(TyFloat); + S(TyEnum); + S(TyPointer); + S(TyArray); + S(TyStruct); + S(TyUnion); + S(TyTypedef); +#undef S + default: + return "Ty???"; + } +} + +/* struct/union field */ +typedef struct _StField { + UInt offset; /* offset into structure (0 for union) (in bits) */ + UInt size; /* size (in bits) */ + SymType *type; /* type of element */ + Char *name; /* name of element */ +} StField; + +/* enum tag */ +typedef struct _EnTag { + const Char *name; /* name */ + UInt val; /* value */ +} EnTag; + +struct _SymType { + TyKind kind; /* type descriminator */ + UInt size; /* sizeof(type) */ + Char *name; /* useful name */ + + union { + /* TyInt,TyBool,TyChar */ + struct { + Bool issigned; /* signed or not */ + } t_scalar; + + /* TyFloat */ + struct { + Bool isdouble; /* is double prec */ + } t_float; + + /* TyRange */ + struct { + Int min; + Int max; + SymType *type; + } t_range; + + /* TyPointer */ + struct { + SymType *type; /* *type */ + } t_pointer; + + /* TyArray */ + struct { + SymType *idxtype; + SymType *type; + } t_array; + + /* TyEnum */ + struct { + UInt ntag; /* number of tags */ + EnTag *tags; /* tags */ + } t_enum; + + /* TyStruct, TyUnion */ + struct { + UInt nfield; /* number of fields */ + UInt nfieldalloc; /* number of fields allocated */ + StField *fields; /* fields */ + } t_struct; + + /* TyTypedef */ + struct { + SymType *type; /* type */ + } t_typedef; + + /* TyUnresolved - reference to unresolved type */ + struct { + /* some kind of symtab reference */ + SymResolver *resolver; /* symtab reader's resolver */ + void *data; /* data for resolver */ + } t_unresolved; + } u; +}; + + +Bool ML_(st_isstruct)(SymType *ty) +{ + return ty->kind == TyStruct; +} + +Bool ML_(st_isunion)(SymType *ty) +{ + return ty->kind == TyUnion; +} + +Bool ML_(st_isenum)(SymType *ty) +{ + return ty->kind == TyEnum; +} + +static inline SymType *alloc(SymType *st) +{ + if (st == NULL) { + st = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*st)); + st->kind = TyUnknown; + st->name = NULL; + } + + return st; +} + +static void resolve(SymType *st) +{ + if (st->kind != TyUnresolved) + return; + + (*st->u.t_unresolved.resolver)(st, st->u.t_unresolved.data); + + if (st->kind == TyUnresolved) + st->kind = TyError; +} + +SymType *ML_(st_mkunresolved)(SymType *st, SymResolver *resolver, void *data) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown); + + st->kind = TyUnresolved; + st->size = 0; + st->u.t_unresolved.resolver = resolver; + st->u.t_unresolved.data = data; + + return st; +} + +void ML_(st_unresolved_setdata)(SymType *st, SymResolver *resolver, void *data) +{ + if (st->kind != TyUnresolved) + return; + + st->u.t_unresolved.resolver = resolver; + st->u.t_unresolved.data = data; +} + +Bool ML_(st_isresolved)(SymType *st) +{ + return st->kind != TyUnresolved; +} + +void ML_(st_setname)(SymType *st, Char *name) +{ + if (st->name != NULL) + st->name = name; +} + +SymType *ML_(st_mkvoid)(SymType *st) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown); + + st->kind = TyVoid; + st->size = 1; /* for address calculations */ + st->name = "void"; + return st; +} + +SymType *ML_(st_mkint)(SymType *st, UInt size, Bool isSigned) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown); + + st->kind = TyInt; + st->size = size; + st->u.t_scalar.issigned = isSigned; + + return st; +} + +SymType *ML_(st_mkfloat)(SymType *st, UInt size) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown); + + st->kind = TyFloat; + st->size = size; + st->u.t_scalar.issigned = True; + + return st; +} + +SymType *ML_(st_mkbool)(SymType *st, UInt size) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown); + + st->kind = TyBool; + st->size = size; + + return st; +} + + +SymType *ML_(st_mkpointer)(SymType *st, SymType *ptr) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown); + + st->kind = TyPointer; + st->size = sizeof(void *); + st->u.t_pointer.type = ptr; + + return st; +} + +SymType *ML_(st_mkrange)(SymType *st, SymType *ty, Int min, Int max) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown); + + st->kind = TyRange; + st->size = 0; /* ? */ + st->u.t_range.type = ty; + st->u.t_range.min = min; + st->u.t_range.max = max; + + return st; +} + +SymType *ML_(st_mkstruct)(SymType *st, UInt size, UInt nfields) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown || st->kind == TyStruct); + + vg_assert(st->kind != TyStruct || st->u.t_struct.nfield == 0); + + st->kind = TyStruct; + st->size = size; + st->u.t_struct.nfield = 0; + st->u.t_struct.nfieldalloc = nfields; + if (nfields != 0) + st->u.t_struct.fields = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(StField) * nfields); + else + st->u.t_struct.fields = NULL; + + return st; +} + +SymType *ML_(st_mkunion)(SymType *st, UInt size, UInt nfields) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown || st->kind == TyUnion); + + vg_assert(st->kind != TyUnion || st->u.t_struct.nfield == 0); + + st->kind = TyUnion; + st->size = size; + st->u.t_struct.nfield = 0; + st->u.t_struct.nfieldalloc = nfields; + if (nfields != 0) + st->u.t_struct.fields = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(StField) * nfields); + else + st->u.t_struct.fields = NULL; + + return st; +} + +void ML_(st_addfield)(SymType *st, Char *name, SymType *type, UInt off, UInt size) +{ + StField *f; + + vg_assert(st->kind == TyStruct || st->kind == TyUnion); + + if (st->u.t_struct.nfieldalloc == st->u.t_struct.nfield) { + StField *n = VG_(arena_malloc)(VG_AR_SYMTAB, + sizeof(StField) * (st->u.t_struct.nfieldalloc + 2)); + VG_(memcpy)(n, st->u.t_struct.fields, sizeof(*n) * st->u.t_struct.nfield); + if (st->u.t_struct.fields != NULL) + VG_(arena_free)(VG_AR_SYMTAB, st->u.t_struct.fields); + st->u.t_struct.nfieldalloc++; + st->u.t_struct.fields = n; + } + + f = &st->u.t_struct.fields[st->u.t_struct.nfield++]; + f->name = name; + f->type = type; + f->offset = off; + f->size = size; +} + + +SymType *ML_(st_mkenum)(SymType *st, UInt ntags) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown || st->kind == TyEnum); + + st->kind = TyEnum; + st->u.t_enum.ntag = 0; + st->u.t_enum.tags = NULL; + + return st; +} + +SymType *ML_(st_mkarray)(SymType *st, SymType *idxtype, SymType *type) +{ + st = alloc(st); + + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown); + + st->kind = TyArray; + st->u.t_array.type = type; + st->u.t_array.idxtype = idxtype; + + return st; +} + +SymType *ML_(st_mktypedef)(SymType *st, Char *name, SymType *type) +{ + st = alloc(st); + + vg_assert(st != type); + vg_assert(st->kind == TyUnresolved || st->kind == TyUnknown || + st->kind == TyStruct || st->kind == TyUnion || + st->kind == TyTypedef); + + st->kind = TyTypedef; + st->name = name; + st->u.t_typedef.type = type; + + return st; +} + + +SymType *ML_(st_basetype)(SymType *type, Bool do_resolve) +{ + while (type->kind == TyTypedef || (do_resolve && type->kind == TyUnresolved)) { + if (do_resolve) + resolve(type); + + if (type->kind == TyTypedef) + type = type->u.t_typedef.type; + } + + return type; +} + +UInt ML_(st_sizeof)(SymType *ty) +{ + return ty->size; +} + +#ifndef TEST +/* + Hash of visited addresses, so we don't get stuck in loops. It isn't + simply enough to keep track of addresses, since we need to interpret + the memory according to the type. If a given location has multiple + pointers with different types (for example, void * and struct foo *), + then we need to look at it under each type. +*/ +struct visited { + Addr a; + SymType *ty; + struct visited *next; +}; + +#define VISIT_HASHSZ 1021 + +static struct visited *visit_hash[VISIT_HASHSZ]; + +static inline Bool test_visited(Addr a, SymType *type) +{ + struct visited *v; + UInt b = (UInt)a % VISIT_HASHSZ; + Bool ret = False; + + for(v = visit_hash[b]; v != NULL; v = v->next) { + if (v->a == a && v->ty == type) { + ret = True; + break; + } + } + + return ret; +} + +static Bool has_visited(Addr a, SymType *type) +{ + static const Bool debug = False; + Bool ret; + + ret = test_visited(a, type); + + if (!ret) { + UInt b = (UInt)a % VISIT_HASHSZ; + struct visited * v = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*v)); + + v->a = a; + v->ty = type; + v->next = visit_hash[b]; + visit_hash[b] = v; + } + + if (debug) + VG_(printf)("has_visited(a=%p, ty=%p) -> %d\n", a, type, ret); + + return ret; +} + +static void clear_visited(void) +{ + UInt i; + + for(i = 0; i < VISIT_HASHSZ; i++) { + struct visited *v, *n; + for(v = visit_hash[i]; v != NULL; v = n) { + n = v->next; + VG_(arena_free)(VG_AR_SYMTAB, v); + } + visit_hash[i] = NULL; + } +} + +static +void bprintf(void (*send)(HChar, void*), void *send_arg, const Char *fmt, ...) +{ + va_list vargs; + + va_start(vargs, fmt); + VG_(debugLog_vprintf)(send, send_arg, fmt, vargs); + va_end(vargs); +} + +#define SHADOWCHUNK 0 /* no longer have a core allocator */ + +#if SHADOWCHUNK +static ShadowChunk *findchunk(Addr a) +{ + Bool find(ShadowChunk *sc) { + return a >= sc->data && a < (sc->data+sc->size); + } + return VG_(any_matching_mallocd_ShadowChunks)(find); +} +#endif + +static struct vki_sigaction sigbus_saved; +static struct vki_sigaction sigsegv_saved; +static vki_sigset_t blockmask_saved; +static jmp_buf valid_addr_jmpbuf; + +static void valid_addr_handler(int sig) +{ + //VG_(printf)("OUCH! %d\n", sig); + __builtin_longjmp(valid_addr_jmpbuf, 1); +} + +/* catch badness signals because we're going to be + playing around in untrusted memory */ +static void setup_signals(void) +{ + Int res; + struct vki_sigaction sigbus_new; + struct vki_sigaction sigsegv_new; + vki_sigset_t unblockmask_new; + + /* Temporarily install a new sigsegv and sigbus handler, and make + sure SIGBUS, SIGSEGV and SIGTERM are unblocked. (Perhaps the + first two can never be blocked anyway?) */ + + sigbus_new.ksa_handler = valid_addr_handler; + sigbus_new.sa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART; + sigbus_new.sa_restorer = NULL; + res = VG_(sigemptyset)( &sigbus_new.sa_mask ); + vg_assert(res == 0); + + sigsegv_new.ksa_handler = valid_addr_handler; + sigsegv_new.sa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART; + sigsegv_new.sa_restorer = NULL; + res = VG_(sigemptyset)( &sigsegv_new.sa_mask ); + vg_assert(res == 0+0); + + res = VG_(sigemptyset)( &unblockmask_new ); + res |= VG_(sigaddset)( &unblockmask_new, VKI_SIGBUS ); + res |= VG_(sigaddset)( &unblockmask_new, VKI_SIGSEGV ); + res |= VG_(sigaddset)( &unblockmask_new, VKI_SIGTERM ); + vg_assert(res == 0+0+0); + + res = VG_(sigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved ); + vg_assert(res == 0+0+0+0); + + res = VG_(sigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved ); + vg_assert(res == 0+0+0+0+0); + + res = VG_(sigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved ); + vg_assert(res == 0+0+0+0+0+0); +} + +static void restore_signals(void) +{ + Int res; + + /* Restore signal state to whatever it was before. */ + res = VG_(sigaction)( VKI_SIGBUS, &sigbus_saved, NULL ); + vg_assert(res == 0 +0); + + res = VG_(sigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL ); + vg_assert(res == 0 +0 +0); + + res = VG_(sigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL ); + vg_assert(res == 0 +0 +0 +0); +} + +/* if false, setup and restore signals for every access */ +#define LAZYSIG 1 + +static Bool is_valid_addr(Addr a) +{ + static SymType faulted = { TyError }; + static const Bool debug = False; + volatile Bool ret = False; + + if ((a > VKI_PAGE_SIZE) && !test_visited(a, &faulted)) { + if (!LAZYSIG) + setup_signals(); + + if (__builtin_setjmp(valid_addr_jmpbuf) == 0) { + volatile UInt *volatile ptr = (volatile UInt *)a; + + *ptr; + + ret = True; + } else { + /* cache bad addresses in visited table */ + has_visited(a, &faulted); + ret = False; + } + + if (!LAZYSIG) + restore_signals(); + } + + if (debug) + VG_(printf)("is_valid_addr(%p) -> %d\n", a, ret); + + return ret; +} + +static Int free_varlist(Variable *list) +{ + Variable *next; + Int count = 0; + + for(; list != NULL; list = next) { + next = list->next; + count++; + if (list->name) + VG_(arena_free)(VG_AR_SYMTAB, list->name); + VG_(arena_free)(VG_AR_SYMTAB, list); + } + return count; +} + +/* Composite: struct, union, array + Non-composite: everything else + */ +static inline Bool is_composite(SymType *ty) +{ + switch(ty->kind) { + case TyUnion: + case TyStruct: + case TyArray: + return True; + + default: + return False; + } +} + +/* There's something at the end of the rainbow */ +static inline Bool is_followable(SymType *ty) +{ + return ty->kind == TyPointer || is_composite(ty); +} + +/* Result buffer */ +static Char *describe_addr_buf; +static UInt describe_addr_bufidx; +static UInt describe_addr_bufsz; + +/* Add a character to the result buffer */ +static void describe_addr_addbuf(HChar c,void *p) { + if ((describe_addr_bufidx+1) >= describe_addr_bufsz) { + Char *n; + + if (describe_addr_bufsz == 0) + describe_addr_bufsz = 8; + else + describe_addr_bufsz *= 2; + + /* use tool malloc so that the tool can free it */ + n = VG_(malloc)(describe_addr_bufsz); + if (describe_addr_buf != NULL && describe_addr_bufidx != 0) + VG_(memcpy)(n, describe_addr_buf, describe_addr_bufidx); + if (describe_addr_buf != NULL) + VG_(free)(describe_addr_buf); + describe_addr_buf = n; + } + describe_addr_buf[describe_addr_bufidx++] = c; + describe_addr_buf[describe_addr_bufidx] = '\0'; +} + +#define MAX_PLY 7 /* max depth we go */ +#define MAX_ELEMENTS 5000 /* max number of array elements we scan */ +#define MAX_VARS 10000 /* max number of variables total traversed */ + +static const Bool memaccount = False; /* match creates to frees */ +static const Bool debug = False; + +/* Add a new variable to the list */ +static Bool newvar(Char *name, SymType *ty, Addr valuep, UInt size, + Variable *var, Int *numvars, Int *created, + Variable **newlist, Variable **newlistend) { + Variable *v; + + /* have we been here before? */ + if (has_visited(valuep, ty)) + return False; + + /* are we too deep? */ + if (var->distance > MAX_PLY) + return False; + + /* have we done too much? */ + if ((*numvars)-- == 0) + return False; + + if (memaccount) + (*created)++; + + v = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(*v)); + + if (name) + v->name = VG_(arena_strdup)(VG_AR_SYMTAB, name); + else + v->name = NULL; + v->type = ML_(st_basetype)(ty, False); + v->valuep = valuep; + v->size = size == -1 ? ty->size : size; + v->container = var; + v->distance = var->distance + 1; + v->next = NULL; + + if (*newlist == NULL) + *newlist = *newlistend = v; + else { + (*newlistend)->next = v; + *newlistend = v; + } + + if (debug) + VG_(printf)(" --> %d: name=%s type=%p(%s %s) container=%p &val=%p\n", + v->distance, v->name, v->type, ppkind(v->type->kind), + v->type->name ? (char *)v->type->name : "", + v->container, v->valuep); + return True; +} + +static void genstring(Variable *v, Variable *inner, Int *len, Char **ep, + Char **sp) { + Variable *c = v->container; + + if (c != NULL) + genstring(c, v, len, ep, sp); + + if (v->name != NULL) { + *len = VG_(strlen)(v->name); + VG_(memcpy)(*ep, v->name, *len); + (*ep) += *len; + } + + switch(v->type->kind) { + case TyPointer: + /* pointer-to-structure/union handled specially */ + if (inner == NULL || + !(inner->type->kind == TyStruct || inner->type->kind == TyUnion)) { + *--(*sp) = '*'; + *--(*sp) = '('; + *(*ep)++ = ')'; + } + break; + + case TyStruct: + case TyUnion: + if (c && c->type->kind == TyPointer) { + *(*ep)++ = '-'; + *(*ep)++ = '>'; + } else + *(*ep)++ = '.'; + break; + + default: + break; + } +} + +Char *VG_(describe_addr)(ThreadId tid, Addr addr) +{ + Addr eip; /* thread's EIP */ + Variable *list; /* worklist */ + Variable *keeplist; /* container variables */ + Variable *found; /* the chain we found */ + Int created=0, freed=0; + Int numvars = MAX_VARS; + + describe_addr_buf = NULL; + describe_addr_bufidx = 0; + describe_addr_bufsz = 0; + + clear_visited(); + + found = NULL; + keeplist = NULL; + + eip = VG_(get_IP)(tid); + list = ML_(get_scope_variables)(tid); + + if (memaccount) { + Variable *v; + + for(v = list; v != NULL; v = v->next) + created++; + } + + if (debug) { + Char file[100]; + Int line; + if (!VG_(get_filename_linenum)(eip, file, sizeof(file), + NULL, 0, NULL, &line)) + file[0] = 0; + VG_(printf)("describing address %p for tid=%d @ %s:%d\n", addr, tid, file, line); + } + + if (LAZYSIG) + setup_signals(); + + /* breadth-first traversal of all memory visible to the program at + the current point */ + while(list != NULL && found == NULL) { + Variable **prev = &list; + Variable *var, *next; + Variable *newlist = NULL, *newlistend = NULL; + + if (debug) + VG_(printf)("----------------------------------------\n"); + + for(var = list; var != NULL; var = next) { + SymType *type = var->type; + Bool keep = False; + + next = var->next; + + if (debug) + VG_(printf)(" %d: name=%s type=%p(%s %s) container=%p &val=%p\n", + var->distance, var->name, + var->type, ppkind(var->type->kind), + var->type->name ? (char *)var->type->name : "", + var->container, var->valuep); + + if (0 && has_visited(var->valuep, var->type)) { + /* advance prev; we're keeping this one on the doomed list */ + prev = &var->next; + continue; + } + + if (!is_composite(var->type) && + addr >= var->valuep && addr < (var->valuep + var->size)) { + /* at hit - remove it from the list, add it to the + keeplist and set found */ + found = var; + *prev = var->next; + var->next = keeplist; + keeplist = var; + break; + } + + type = ML_(st_basetype)(type, True); + + switch(type->kind) { + case TyUnion: + case TyStruct: { + Int i; + + if (debug) + VG_(printf)(" %d fields\n", type->u.t_struct.nfield); + for(i = 0; i < type->u.t_struct.nfield; i++) { + StField *f = &type->u.t_struct.fields[i]; + if(newvar(f->name, f->type, var->valuep + (f->offset / 8), + (f->size + 7) / 8, var, &numvars, &created, &newlist, + &newlistend)) + keep = True; + } + break; + } + + case TyArray: { + Int i; + Int offset; /* offset of index for non-0-based arrays */ + Int min, max; /* range of indicies we care about (0 based) */ + SymType *ty = type->u.t_array.type; + vg_assert(type->u.t_array.idxtype->kind == TyRange); + + offset = type->u.t_array.idxtype->u.t_range.min; + min = 0; + max = type->u.t_array.idxtype->u.t_range.max - offset; + + if ((max-min+1) == 0) { +#if SHADOWCHUNK + /* zero-sized array - look at allocated memory */ + ShadowChunk *sc = findchunk(var->valuep); + + if (sc != NULL) { + max = ((sc->data + sc->size - var->valuep) / ty->size) + min; + if (debug) + VG_(printf)(" zero sized array: using min=%d max=%d\n", + min, max); + } +#endif + } + + /* If this array's elements can't take us anywhere useful, + just look to see if an element itself is being pointed + to; otherwise just skip the whole thing */ + if (!is_followable(ty)) { + UInt sz = ty->size * (max+1); + + if (debug) + VG_(printf)(" non-followable array (sz=%d): checking addr %p in range %p-%p\n", + sz, addr, var->valuep, (var->valuep + sz)); + if (ty->size > 0 && addr >= var->valuep && addr <= (var->valuep + sz)) + min = max = (addr - var->valuep) / ty->size; + else + break; + } + + /* truncate array if it's too big */ + if (max-min+1 > MAX_ELEMENTS) + max = min+MAX_ELEMENTS; + + if (debug) + VG_(printf)(" array index %d - %d\n", min, max); + for(i = min; i <= max; i++) { + Char b[10]; + VG_(sprintf)(b, "[%d]", i+offset); + if(newvar(b, ty, var->valuep + (i * ty->size), -1, var, + &numvars, &created, &newlist, &newlistend)) + keep = True; + } + + break; + } + + case TyPointer: + /* follow */ + /* XXX work out a way of telling whether a pointer is + actually a decayed array, and treat it accordingly */ + if (is_valid_addr(var->valuep)) + if(newvar(NULL, type->u.t_pointer.type, *(Addr *)var->valuep, + -1, var, &numvars, &created, &newlist, &newlistend)) + keep = True; + break; + + case TyUnresolved: + VG_(printf)("var %s is unresolved (type=%p)\n", var->name, type); + break; + + default: + /* Simple non-composite, non-pointer type */ + break; + } + + if (keep) { + /* ironically, keep means remove it from the list */ + *prev = next; + + /* being kept - add it if not already there */ + if (keeplist != var) { + var->next = keeplist; + keeplist = var; + } + } else { + /* advance prev; we're keeping it on the doomed list */ + prev = &var->next; + } + } + + /* kill old list */ + freed += free_varlist(list); + list = NULL; + + if (found) { + /* kill new list too */ + freed += free_varlist(newlist); + newlist = newlistend = NULL; + } else { + /* new list becomes old list */ + list = newlist; + } + } + + if (LAZYSIG) + restore_signals(); + + if (found != NULL) { + Int len = 0; + Char file[100]; + Int line; + + /* Try to generate an idiomatic C-like expression from what + we've found. */ + + { + Variable *v; + for(v = found; v != NULL; v = v->container) { + if (debug) + VG_(printf)("v=%p (%s) %s\n", + v, v->name ? v->name : (Char *)"", + ppkind(v->type->kind)); + + len += (v->name ? VG_(strlen)(v->name) : 0) + 5; + } + } + + /* now that we know how long the expression will be + (approximately) build it up */ + { + Char expr[len*2]; + Char *sp = &expr[len]; /* pointer at start of string */ + Char *ep = sp; /* pointer at end of string */ + Bool ptr = True; + + /* If the result is already a pointer, just use that as the + value, otherwise generate &(...) around the expression. */ + if (found->container && found->container->type->kind == TyPointer) { + vg_assert(found->name == NULL); + + found->name = found->container->name; + found->container->name = NULL; + found->container = found->container->container; + } else { + bprintf(describe_addr_addbuf, 0, "&("); + ptr = False; + } + + genstring(found, NULL, &len, &ep, &sp); + + if (!ptr) + *ep++ = ')'; + + *ep++ = '\0'; + + bprintf(describe_addr_addbuf, 0, sp); + + if (addr != found->valuep) + bprintf(describe_addr_addbuf, 0, "+%d", addr - found->valuep); + + if (VG_(get_filename_linenum)(eip, file, sizeof(file), + NULL, 0, NULL, &line)) + bprintf(describe_addr_addbuf, 0, " at %s:%d", file, line, addr); + } + } + + freed += free_varlist(keeplist); + + if (memaccount) + VG_(printf)("created %d, freed %d\n", created, freed); + + clear_visited(); + + if (debug) + VG_(printf)("returning buf=%s\n", describe_addr_buf); + + return describe_addr_buf; +} +#endif /* TEST */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------*/ +/*--- Header for symbol table stuff. priv_symtab.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2005 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_SYMTAB_H +#define __PRIV_SYMTAB_H + +/* A structure to hold an ELF symbol (very crudely). */ +typedef + struct { + Addr addr; /* lowest address of entity */ + UInt size; /* size in bytes */ + Char *name; /* name */ + Addr tocptr; /* ppc64-linux only: value that R2 should have */ + } + RiSym; + +/* Line count at which overflow happens, due to line numbers being stored as + * shorts in `struct nlist' in a.out.h. */ +#define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) + +#define LINENO_BITS 20 +#define LOC_SIZE_BITS (32 - LINENO_BITS) +#define MAX_LINENO ((1 << LINENO_BITS) - 1) + +/* Unlikely to have any lines with instruction ranges > 4096 bytes */ +#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) + +/* Number used to detect line number overflows; if one line is 60000-odd + * smaller than the previous, is was probably an overflow. + */ +#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) + +/* A structure to hold addr-to-source info for a single line. There can be a + * lot of these, hence the dense packing. */ +typedef + struct { + /* Word 1 */ + Addr addr; /* lowest address for this line */ + /* Word 2 */ + UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */ + UInt lineno:LINENO_BITS; /* source line number, or zero */ + /* Word 3 */ + Char* filename; /* source filename */ + /* Word 4 */ + Char* dirname; /* source directory name */ + } + RiLoc; + + +/* A structure to hold a set of variables in a particular scope */ +typedef struct _Scope Scope; /* a set of symbols in one scope */ +typedef struct _Sym Sym; /* a single symbol */ +typedef struct _ScopeRange ScopeRange; /* a range of code addreses a scope covers */ + +typedef enum { + SyESPrel, /* on the stack (relative to ESP) */ + SyEBPrel, /* on the stack (relative to EBP) */ + SyReg, /* in a register */ + SyType, /* a type definition */ + SyStatic, /* a static variable */ + SyGlobal, /* a global variable (XXX any different to static + in an outer scope?) */ +} SyKind; + +struct _Sym { + SymType *type; /* type */ + Char *name; /* name */ + SyKind kind; /* kind of symbol */ + + /* a value, depending on kind */ + union { + OffT offset; /* offset on stack (-ve -> ebp; +ve -> esp) */ + Int regno; /* register number */ + Addr addr; /* static or global address */ + } u; +}; + +struct _Scope { + Scope *outer; /* outer (containing) scope */ + UInt nsyms; /* number of symbols in this scope */ + UInt depth; /* depth of scope */ + Sym *syms; /* the symbols */ +}; + +/* A structure to map a scope to a range of code addresses; scopes may + be broken into multiple ranges (before and after a nested scope) */ +struct _ScopeRange { + Addr addr; /* start address of this scope */ + Int size; /* length of scope */ + Scope *scope; /* symbols in scope */ +}; + +#define STRCHUNKSIZE (64*1024) + + +/* A structure to summarise CFI summary info for the code address + range [base .. base+len-1]. In short, if you know (sp,fp,ip) at + some point and ip is in the range [base .. base+len-1], it tells + you how to calculate (sp,fp) for the caller of the current + frame and also ra, the return address of the current frame. + + First off, calculate CFA, the Canonical Frame Address, thusly: + + cfa = if cfa_sprel then sp+cfa_off else fp+cfa_off + + Once that is done, the previous frame's sp/fp values and this + frame's ra value can be calculated like this: + + old_sp/fp/ra + = case sp/fp/ra_how of + CFIR_UNKNOWN -> we don't know, sorry + CFIR_SAME -> same as it was before (sp/fp only) + CFIR_CFAREL -> cfa + sp/fp/ra_off + CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) +*/ + +#define CFIR_UNKNOWN ((UChar)0) +#define CFIR_SAME ((UChar)1) +#define CFIR_CFAREL ((UChar)2) +#define CFIR_MEMCFAREL ((UChar)3) + +typedef + struct { + Addr base; + UInt len; + Bool cfa_sprel; + UChar ra_how; /* a CFIR_ value */ + UChar sp_how; /* a CFIR_ value */ + UChar fp_how; /* a CFIR_ value */ + Int cfa_off; + Int ra_off; + Int sp_off; + Int fp_off; + } + CfiSI; + +extern void ML_(ppCfiSI) ( CfiSI* ); + + +/* A structure which contains information pertaining to one mapped + text segment. This type is exported only abstractly - in + pub_tool_debuginfo.h. */ +struct _SegInfo { + struct _SegInfo* next; /* list of SegInfos */ + + Int ref; + + /* Description of the mapped segment. */ + Addr start; + UInt size; + Char* filename; /* in mallocville */ + OffT foffset; + Char* soname; + + /* An expandable array of symbols. */ + RiSym* symtab; + UInt symtab_used; + UInt symtab_size; + /* An expandable array of locations. */ + RiLoc* loctab; + UInt loctab_used; + UInt loctab_size; + /* An expandable array of scope ranges. */ + ScopeRange *scopetab; + UInt scopetab_used; + UInt scopetab_size; + /* An expandable array of CFI summary info records. Also includes + summary address bounds, showing the min and max address covered + by any of the records, as an aid to fast searching. */ + CfiSI* cfisi; + UInt cfisi_used; + UInt cfisi_size; + Addr cfisi_minaddr; + Addr cfisi_maxaddr; + + /* Expandable arrays of characters -- the string table. + Pointers into this are stable (the arrays are not reallocated) + */ + struct strchunk { + UInt strtab_used; + struct strchunk *next; + Char strtab[STRCHUNKSIZE]; + } *strchunks; + + /* offset is what we need to add to symbol table entries + to get the real location of that symbol in memory. + */ + OffT offset; + + /* Bounds of data, BSS, PLT, GOT and OPD (for ppc64-linux) so that + tools can see what section an address is in. In the running image! */ + Addr plt_start_vma; + UInt plt_size; + Addr got_start_vma; + UInt got_size; + Addr opd_start_vma; + UInt opd_size; + Addr data_start_vma; + UInt data_size; + Addr bss_start_vma; + UInt bss_size; + + /* data used by stabs parser */ + struct _StabTypeTab *stab_typetab; +}; + +extern +Char *ML_(addStr) ( SegInfo* si, Char* str, Int len ); + +extern +void ML_(addScopeInfo) ( SegInfo* si, Addr this, Addr next, Scope *scope); + +extern +void ML_(addLineInfo) ( SegInfo* si, + Char* filename, + Char* dirname, /* NULL is allowable */ + Addr this, Addr next, Int lineno, Int entry); + +extern +void ML_(addCfiSI) ( SegInfo* si, CfiSI* cfisi ); + +/* Non-fatal -- use vg_panic if terminal. */ +extern +void ML_(symerr) ( Char* msg ); + +/* -------------------- + Stabs reader + -------------------- */ +extern +void ML_(read_debuginfo_stabs) ( SegInfo* si, + UChar* stabC, Int stab_sz, + UChar* stabstr, Int stabstr_sz ); + +/* -------------------- + DWARF2 reader + -------------------- */ +extern +void ML_(read_debuginfo_dwarf2) + ( SegInfo* si, + UChar* debuginfo, Int debug_info_sz, /* .debug_info */ + UChar* debugabbrev, /* .debug_abbrev */ + UChar* debugline, Int debug_line_sz, /* .debug_line */ + UChar* debugstr ); + +/* -------------------- + DWARF1 reader + -------------------- */ +extern +void ML_(read_debuginfo_dwarf1) ( SegInfo* si, + UChar* dwarf1d, Int dwarf1d_sz, + UChar* dwarf1l, Int dwarf1l_sz ); + +/* -------------------- + CFI reader + -------------------- */ +extern +void ML_(read_callframe_info_dwarf2) + ( /*OUT*/SegInfo* si, UChar* ehframe, Int ehframe_sz, Addr ehframe_addr ); + + +#endif // __PRIV_SYMTAB_H + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------*/ +/*--- Intra-Valgrind interfaces for symtypes.c. priv_symtypes.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2005 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_SYMTYPES_H +#define __PRIV_SYMTYPES_H + +/* Lets try to make these opaque */ +typedef struct _SymType SymType; + +/* ------------------------------------------------------------ + Constructors for various SymType nodes + ------------------------------------------------------------ */ + +/* Find the basetype for a given type: that is, if type is a typedef, + return the typedef'd type. If resolve is true, it will resolve + unresolved symbols. If type is not a typedef, then this is just + returns type. +*/ +SymType *ML_(st_basetype)(SymType *type, Bool resolve); + +void ML_(st_setname)(SymType *ty, Char *name); + +typedef void (SymResolver)(SymType *, void *); + +/* Create an unresolved type */ +SymType *ML_(st_mkunresolved)(SymType *, SymResolver *resolve, void *data); + +/* update an unresolved type's data */ +void ML_(st_unresolved_setdata)(SymType *, SymResolver *resolve, void *data); + +Bool ML_(st_isresolved)(SymType *); +UInt ML_(st_sizeof)(SymType *); + +/* Unknown type (unparsable) */ +SymType *ML_(st_mkunknown)(SymType *); + +SymType *ML_(st_mkvoid)(SymType *); + +SymType *ML_(st_mkint)(SymType *, UInt size, Bool isSigned); +SymType *ML_(st_mkbool)(SymType *, UInt size); +SymType *ML_(st_mkchar)(SymType *, Bool isSigned); +SymType *ML_(st_mkfloat)(SymType *, UInt size); +SymType *ML_(st_mkdouble)(SymType *, UInt size); + +SymType *ML_(st_mkpointer)(SymType *, SymType *); +SymType *ML_(st_mkrange)(SymType *, SymType *, Int min, Int max); + +SymType *ML_(st_mkstruct)(SymType *, UInt size, UInt nfields); +SymType *ML_(st_mkunion)(SymType *, UInt size, UInt nfields); +void ML_(st_addfield)(SymType *, Char *name, SymType *, UInt off, UInt size); + +SymType *ML_(st_mkenum)(SymType *, UInt ntags); +SymType *ML_(st_addtag)(SymType *, Char *name, Int val); + +SymType *ML_(st_mkarray)(SymType *, SymType *idxtype, SymType *artype); + +SymType *ML_(st_mktypedef)(SymType *, Char *name, SymType *type); + +Bool ML_(st_isstruct)(SymType *); +Bool ML_(st_isunion)(SymType *); +Bool ML_(st_isenum)(SymType *); + +/* ------------------------------------------------------------ + Interface with symtab.c + ------------------------------------------------------------ */ + +/* Typed value */ +typedef struct _Variable Variable; + +struct _Variable { + Char *name; /* name */ + SymType *type; /* type of value */ + Addr valuep; /* pointer to value */ + UInt size; /* size of value */ + UInt distance; /* "distance" from site of interest */ + Variable *next; + Variable *container; +}; + +Variable *ML_(get_scope_variables)(ThreadId tid); + +#endif // __PRIV_SYMTYPES_H + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/d3basics.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/d3basics.c.svn-base new file mode 100644 index 0000000..8969cdc --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/d3basics.c.svn-base @@ -0,0 +1,939 @@ + +/*--------------------------------------------------------------------*/ +/*--- Basic definitions and helper functions for DWARF3. ---*/ +/*--- d3basics.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2008-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +#include "pub_core_basics.h" +#include "pub_core_debuginfo.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_options.h" +#include "pub_core_xarray.h" + +#include "pub_core_vki.h" /* VKI_PROT_READ */ +#include "pub_core_aspacemgr.h" /* VG_(is_valid_for_client) */ + +#include "priv_misc.h" +#include "priv_d3basics.h" /* self */ +#include "priv_storage.h" + +HChar* ML_(pp_DW_children) ( DW_children hashch ) +{ + switch (hashch) { + case DW_children_no: return "no children"; + case DW_children_yes: return "has children"; + default: return "DW_children_???"; + } +} + +HChar* ML_(pp_DW_TAG) ( DW_TAG tag ) +{ + switch (tag) { + case DW_TAG_padding: return "DW_TAG_padding"; + case DW_TAG_array_type: return "DW_TAG_array_type"; + case DW_TAG_class_type: return "DW_TAG_class_type"; + case DW_TAG_entry_point: return "DW_TAG_entry_point"; + case DW_TAG_enumeration_type: return "DW_TAG_enumeration_type"; + case DW_TAG_formal_parameter: return "DW_TAG_formal_parameter"; + case DW_TAG_imported_declaration: + return "DW_TAG_imported_declaration"; + case DW_TAG_label: return "DW_TAG_label"; + case DW_TAG_lexical_block: return "DW_TAG_lexical_block"; + case DW_TAG_member: return "DW_TAG_member"; + case DW_TAG_pointer_type: return "DW_TAG_pointer_type"; + case DW_TAG_reference_type: return "DW_TAG_reference_type"; + case DW_TAG_compile_unit: return "DW_TAG_compile_unit"; + case DW_TAG_string_type: return "DW_TAG_string_type"; + case DW_TAG_structure_type: return "DW_TAG_structure_type"; + case DW_TAG_subroutine_type: return "DW_TAG_subroutine_type"; + case DW_TAG_typedef: return "DW_TAG_typedef"; + case DW_TAG_union_type: return "DW_TAG_union_type"; + case DW_TAG_unspecified_parameters: + return "DW_TAG_unspecified_parameters"; + case DW_TAG_variant: return "DW_TAG_variant"; + case DW_TAG_common_block: return "DW_TAG_common_block"; + case DW_TAG_common_inclusion: return "DW_TAG_common_inclusion"; + case DW_TAG_inheritance: return "DW_TAG_inheritance"; + case DW_TAG_inlined_subroutine: + return "DW_TAG_inlined_subroutine"; + case DW_TAG_module: return "DW_TAG_module"; + case DW_TAG_ptr_to_member_type: return "DW_TAG_ptr_to_member_type"; + case DW_TAG_set_type: return "DW_TAG_set_type"; + case DW_TAG_subrange_type: return "DW_TAG_subrange_type"; + case DW_TAG_with_stmt: return "DW_TAG_with_stmt"; + case DW_TAG_access_declaration: return "DW_TAG_access_declaration"; + case DW_TAG_base_type: return "DW_TAG_base_type"; + case DW_TAG_catch_block: return "DW_TAG_catch_block"; + case DW_TAG_const_type: return "DW_TAG_const_type"; + case DW_TAG_constant: return "DW_TAG_constant"; + case DW_TAG_enumerator: return "DW_TAG_enumerator"; + case DW_TAG_file_type: return "DW_TAG_file_type"; + case DW_TAG_friend: return "DW_TAG_friend"; + case DW_TAG_namelist: return "DW_TAG_namelist"; + case DW_TAG_namelist_item: return "DW_TAG_namelist_item"; + case DW_TAG_packed_type: return "DW_TAG_packed_type"; + case DW_TAG_subprogram: return "DW_TAG_subprogram"; + case DW_TAG_template_type_param: + return "DW_TAG_template_type_param"; + case DW_TAG_template_value_param: + return "DW_TAG_template_value_param"; + case DW_TAG_thrown_type: return "DW_TAG_thrown_type"; + case DW_TAG_try_block: return "DW_TAG_try_block"; + case DW_TAG_variant_part: return "DW_TAG_variant_part"; + case DW_TAG_variable: return "DW_TAG_variable"; + case DW_TAG_volatile_type: return "DW_TAG_volatile_type"; + /* DWARF 3. */ + case DW_TAG_dwarf_procedure: return "DW_TAG_dwarf_procedure"; + case DW_TAG_restrict_type: return "DW_TAG_restrict_type"; + case DW_TAG_interface_type: return "DW_TAG_interface_type"; + case DW_TAG_namespace: return "DW_TAG_namespace"; + case DW_TAG_imported_module: return "DW_TAG_imported_module"; + case DW_TAG_unspecified_type: return "DW_TAG_unspecified_type"; + case DW_TAG_partial_unit: return "DW_TAG_partial_unit"; + case DW_TAG_imported_unit: return "DW_TAG_imported_unit"; + case DW_TAG_condition: return "DW_TAG_condition"; + case DW_TAG_shared_type: return "DW_TAG_shared_type"; + /* SGI/MIPS Extensions. */ + case DW_TAG_MIPS_loop: return "DW_TAG_MIPS_loop"; + /* HP extensions. See: + ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz . */ + case DW_TAG_HP_array_descriptor: + return "DW_TAG_HP_array_descriptor"; + /* GNU extensions. */ + case DW_TAG_format_label: return "DW_TAG_format_label"; + case DW_TAG_function_template: return "DW_TAG_function_template"; + case DW_TAG_class_template: return "DW_TAG_class_template"; + case DW_TAG_GNU_BINCL: return "DW_TAG_GNU_BINCL"; + case DW_TAG_GNU_EINCL: return "DW_TAG_GNU_EINCL"; + /* Extensions for UPC. See: http://upc.gwu.edu/~upc. */ + case DW_TAG_upc_shared_type: return "DW_TAG_upc_shared_type"; + case DW_TAG_upc_strict_type: return "DW_TAG_upc_strict_type"; + case DW_TAG_upc_relaxed_type: return "DW_TAG_upc_relaxed_type"; + /* PGI (STMicroelectronics) extensions. No documentation available. */ + case DW_TAG_PGI_kanji_type: return "DW_TAG_PGI_kanji_type"; + case DW_TAG_PGI_interface_block: + return "DW_TAG_PGI_interface_block"; + default: return "DW_TAG_???"; + } +} + +HChar* ML_(pp_DW_FORM) ( DW_FORM form ) +{ + switch (form) { + case DW_FORM_addr: return "DW_FORM_addr"; + case DW_FORM_block2: return "DW_FORM_block2"; + case DW_FORM_block4: return "DW_FORM_block4"; + case DW_FORM_data2: return "DW_FORM_data2"; + case DW_FORM_data4: return "DW_FORM_data4"; + case DW_FORM_data8: return "DW_FORM_data8"; + case DW_FORM_string: return "DW_FORM_string"; + case DW_FORM_block: return "DW_FORM_block"; + case DW_FORM_block1: return "DW_FORM_block1"; + case DW_FORM_data1: return "DW_FORM_data1"; + case DW_FORM_flag: return "DW_FORM_flag"; + case DW_FORM_sdata: return "DW_FORM_sdata"; + case DW_FORM_strp: return "DW_FORM_strp"; + case DW_FORM_udata: return "DW_FORM_udata"; + case DW_FORM_ref_addr: return "DW_FORM_ref_addr"; + case DW_FORM_ref1: return "DW_FORM_ref1"; + case DW_FORM_ref2: return "DW_FORM_ref2"; + case DW_FORM_ref4: return "DW_FORM_ref4"; + case DW_FORM_ref8: return "DW_FORM_ref8"; + case DW_FORM_ref_udata: return "DW_FORM_ref_udata"; + case DW_FORM_indirect: return "DW_FORM_indirect"; + default: return "DW_FORM_???"; + } +} + +HChar* ML_(pp_DW_AT) ( DW_AT attr ) +{ + switch (attr) { + case DW_AT_sibling: return "DW_AT_sibling"; + case DW_AT_location: return "DW_AT_location"; + case DW_AT_name: return "DW_AT_name"; + case DW_AT_ordering: return "DW_AT_ordering"; + case DW_AT_subscr_data: return "DW_AT_subscr_data"; + case DW_AT_byte_size: return "DW_AT_byte_size"; + case DW_AT_bit_offset: return "DW_AT_bit_offset"; + case DW_AT_bit_size: return "DW_AT_bit_size"; + case DW_AT_element_list: return "DW_AT_element_list"; + case DW_AT_stmt_list: return "DW_AT_stmt_list"; + case DW_AT_low_pc: return "DW_AT_low_pc"; + case DW_AT_high_pc: return "DW_AT_high_pc"; + case DW_AT_language: return "DW_AT_language"; + case DW_AT_member: return "DW_AT_member"; + case DW_AT_discr: return "DW_AT_discr"; + case DW_AT_discr_value: return "DW_AT_discr_value"; + case DW_AT_visibility: return "DW_AT_visibility"; + case DW_AT_import: return "DW_AT_import"; + case DW_AT_string_length: return "DW_AT_string_length"; + case DW_AT_common_reference: return "DW_AT_common_reference"; + case DW_AT_comp_dir: return "DW_AT_comp_dir"; + case DW_AT_const_value: return "DW_AT_const_value"; + case DW_AT_containing_type: return "DW_AT_containing_type"; + case DW_AT_default_value: return "DW_AT_default_value"; + case DW_AT_inline: return "DW_AT_inline"; + case DW_AT_is_optional: return "DW_AT_is_optional"; + case DW_AT_lower_bound: return "DW_AT_lower_bound"; + case DW_AT_producer: return "DW_AT_producer"; + case DW_AT_prototyped: return "DW_AT_prototyped"; + case DW_AT_return_addr: return "DW_AT_return_addr"; + case DW_AT_start_scope: return "DW_AT_start_scope"; + case DW_AT_stride_size: return "DW_AT_stride_size"; + case DW_AT_upper_bound: return "DW_AT_upper_bound"; + case DW_AT_abstract_origin: return "DW_AT_abstract_origin"; + case DW_AT_accessibility: return "DW_AT_accessibility"; + case DW_AT_address_class: return "DW_AT_address_class"; + case DW_AT_artificial: return "DW_AT_artificial"; + case DW_AT_base_types: return "DW_AT_base_types"; + case DW_AT_calling_convention: return "DW_AT_calling_convention"; + case DW_AT_count: return "DW_AT_count"; + case DW_AT_data_member_location: return "DW_AT_data_member_location"; + case DW_AT_decl_column: return "DW_AT_decl_column"; + case DW_AT_decl_file: return "DW_AT_decl_file"; + case DW_AT_decl_line: return "DW_AT_decl_line"; + case DW_AT_declaration: return "DW_AT_declaration"; + case DW_AT_discr_list: return "DW_AT_discr_list"; + case DW_AT_encoding: return "DW_AT_encoding"; + case DW_AT_external: return "DW_AT_external"; + case DW_AT_frame_base: return "DW_AT_frame_base"; + case DW_AT_friend: return "DW_AT_friend"; + case DW_AT_identifier_case: return "DW_AT_identifier_case"; + case DW_AT_macro_info: return "DW_AT_macro_info"; + case DW_AT_namelist_items: return "DW_AT_namelist_items"; + case DW_AT_priority: return "DW_AT_priority"; + case DW_AT_segment: return "DW_AT_segment"; + case DW_AT_specification: return "DW_AT_specification"; + case DW_AT_static_link: return "DW_AT_static_link"; + case DW_AT_type: return "DW_AT_type"; + case DW_AT_use_location: return "DW_AT_use_location"; + case DW_AT_variable_parameter: return "DW_AT_variable_parameter"; + case DW_AT_virtuality: return "DW_AT_virtuality"; + case DW_AT_vtable_elem_location: return "DW_AT_vtable_elem_location"; + /* DWARF 3 values. */ + case DW_AT_allocated: return "DW_AT_allocated"; + case DW_AT_associated: return "DW_AT_associated"; + case DW_AT_data_location: return "DW_AT_data_location"; + case DW_AT_stride: return "DW_AT_stride"; + case DW_AT_entry_pc: return "DW_AT_entry_pc"; + case DW_AT_use_UTF8: return "DW_AT_use_UTF8"; + case DW_AT_extension: return "DW_AT_extension"; + case DW_AT_ranges: return "DW_AT_ranges"; + case DW_AT_trampoline: return "DW_AT_trampoline"; + case DW_AT_call_column: return "DW_AT_call_column"; + case DW_AT_call_file: return "DW_AT_call_file"; + case DW_AT_call_line: return "DW_AT_call_line"; + case DW_AT_description: return "DW_AT_description"; + case DW_AT_binary_scale: return "DW_AT_binary_scale"; + case DW_AT_decimal_scale: return "DW_AT_decimal_scale"; + case DW_AT_small: return "DW_AT_small"; + case DW_AT_decimal_sign: return "DW_AT_decimal_sign"; + case DW_AT_digit_count: return "DW_AT_digit_count"; + case DW_AT_picture_string: return "DW_AT_picture_string"; + case DW_AT_mutable: return "DW_AT_mutable"; + case DW_AT_threads_scaled: return "DW_AT_threads_scaled"; + case DW_AT_explicit: return "DW_AT_explicit"; + case DW_AT_object_pointer: return "DW_AT_object_pointer"; + case DW_AT_endianity: return "DW_AT_endianity"; + case DW_AT_elemental: return "DW_AT_elemental"; + case DW_AT_pure: return "DW_AT_pure"; + case DW_AT_recursive: return "DW_AT_recursive"; + /* SGI/MIPS extensions. */ + /* case DW_AT_MIPS_fde: return "DW_AT_MIPS_fde"; */ + /* DW_AT_MIPS_fde == DW_AT_HP_unmodifiable */ + case DW_AT_MIPS_loop_begin: return "DW_AT_MIPS_loop_begin"; + case DW_AT_MIPS_tail_loop_begin: return "DW_AT_MIPS_tail_loop_begin"; + case DW_AT_MIPS_epilog_begin: return "DW_AT_MIPS_epilog_begin"; + case DW_AT_MIPS_loop_unroll_factor: return "DW_AT_MIPS_loop_unroll_factor"; + case DW_AT_MIPS_software_pipeline_depth: return "DW_AT_MIPS_software_pipeline_depth"; + case DW_AT_MIPS_linkage_name: return "DW_AT_MIPS_linkage_name"; + case DW_AT_MIPS_stride: return "DW_AT_MIPS_stride"; + case DW_AT_MIPS_abstract_name: return "DW_AT_MIPS_abstract_name"; + case DW_AT_MIPS_clone_origin: return "DW_AT_MIPS_clone_origin"; + case DW_AT_MIPS_has_inlines: return "DW_AT_MIPS_has_inlines"; + /* HP extensions. */ + case DW_AT_HP_block_index: return "DW_AT_HP_block_index"; + case DW_AT_HP_unmodifiable: return "DW_AT_HP_unmodifiable"; + case DW_AT_HP_actuals_stmt_list: return "DW_AT_HP_actuals_stmt_list"; + case DW_AT_HP_proc_per_section: return "DW_AT_HP_proc_per_section"; + case DW_AT_HP_raw_data_ptr: return "DW_AT_HP_raw_data_ptr"; + case DW_AT_HP_pass_by_reference: return "DW_AT_HP_pass_by_reference"; + case DW_AT_HP_opt_level: return "DW_AT_HP_opt_level"; + case DW_AT_HP_prof_version_id: return "DW_AT_HP_prof_version_id"; + case DW_AT_HP_opt_flags: return "DW_AT_HP_opt_flags"; + case DW_AT_HP_cold_region_low_pc: return "DW_AT_HP_cold_region_low_pc"; + case DW_AT_HP_cold_region_high_pc: return "DW_AT_HP_cold_region_high_pc"; + case DW_AT_HP_all_variables_modifiable: return "DW_AT_HP_all_variables_modifiable"; + case DW_AT_HP_linkage_name: return "DW_AT_HP_linkage_name"; + case DW_AT_HP_prof_flags: return "DW_AT_HP_prof_flags"; + /* GNU extensions. */ + case DW_AT_sf_names: return "DW_AT_sf_names"; + case DW_AT_src_info: return "DW_AT_src_info"; + case DW_AT_mac_info: return "DW_AT_mac_info"; + case DW_AT_src_coords: return "DW_AT_src_coords"; + case DW_AT_body_begin: return "DW_AT_body_begin"; + case DW_AT_body_end: return "DW_AT_body_end"; + case DW_AT_GNU_vector: return "DW_AT_GNU_vector"; + /* VMS extensions. */ + case DW_AT_VMS_rtnbeg_pd_address: return "DW_AT_VMS_rtnbeg_pd_address"; + /* UPC extension. */ + case DW_AT_upc_threads_scaled: return "DW_AT_upc_threads_scaled"; + /* PGI (STMicroelectronics) extensions. */ + case DW_AT_PGI_lbase: return "DW_AT_PGI_lbase"; + case DW_AT_PGI_soffset: return "DW_AT_PGI_soffset"; + case DW_AT_PGI_lstride: return "DW_AT_PGI_lstride"; + default: return "DW_AT_???"; + } +} + + +/* ------ To do with evaluation of Dwarf expressions ------ */ + +/* FIXME: duplicated in readdwarf.c */ +static +ULong read_leb128 ( UChar* data, Int* length_return, Int sign ) +{ + ULong result = 0; + UInt num_read = 0; + Int shift = 0; + UChar byte; + + vg_assert(sign == 0 || sign == 1); + + do + { + byte = * data ++; + num_read ++; + + result |= ((ULong)(byte & 0x7f)) << shift; + + shift += 7; + + } + while (byte & 0x80); + + if (length_return != NULL) + * length_return = num_read; + + if (sign && (shift < 64) && (byte & 0x40)) + result |= -(1ULL << shift); + + return result; +} + +/* Small helper functions easier to use + * value is returned and the given pointer is + * moved past end of leb128 data */ +/* FIXME: duplicated in readdwarf.c */ +static ULong read_leb128U( UChar **data ) +{ + Int len; + ULong val = read_leb128( *data, &len, 0 ); + *data += len; + return val; +} + +/* Same for signed data */ +/* FIXME: duplicated in readdwarf.c */ +static Long read_leb128S( UChar **data ) +{ + Int len; + ULong val = read_leb128( *data, &len, 1 ); + *data += len; + return (Long)val; +} + +/* FIXME: duplicates logic in readdwarf.c: copy_convert_CfiExpr_tree + and {FP,SP}_REG decls */ +static Bool get_Dwarf_Reg( /*OUT*/Addr* a, Word regno, RegSummary* regs ) +{ + vg_assert(regs); +# if defined(VGP_amd64_linux) + if (regno == 6/*RBP*/) { *a = regs->fp; return True; } + if (regno == 7/*RSP*/) { *a = regs->sp; return True; } +# elif defined(VGP_x86_linux) + if (regno == 5/*EBP*/) { *a = regs->fp; return True; } + if (regno == 4/*ESP*/) { *a = regs->sp; return True; } +# elif defined(VGP_ppc32_linux) + if (regno == 1/*SP*/) { *a = regs->sp; return True; } + if (regno == 31) return False; + vg_assert(0); +# elif defined(VGP_ppc64_linux) + if (regno == 1/*SP*/) { *a = regs->sp; return True; } + if (regno == 31) return False; + vg_assert(0); +# elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5) + vg_assert(0); /* this function should never be called */ +# else +# error "Unknown platform" +# endif + return False; +} + +/* Convert a stated address to an actual address */ +static Bool bias_address( Addr* a, const DebugInfo* di ) +{ + if (di->text_present + && di->text_size > 0 + && *a >= di->text_debug_svma && *a < di->text_debug_svma + di->text_size) { + *a += di->text_debug_bias; + } + else if (di->data_present + && di->data_size > 0 + && *a >= di->data_debug_svma && *a < di->data_debug_svma + di->data_size) { + *a += di->data_debug_bias; + } + else if (di->sdata_present + && di->sdata_size > 0 + && *a >= di->sdata_debug_svma && *a < di->sdata_debug_svma + di->sdata_size) { + *a += di->sdata_debug_bias; + } + else if (di->rodata_present + && di->rodata_size > 0 + && *a >= di->rodata_debug_svma && *a < di->rodata_debug_svma + di->rodata_size) { + *a += di->rodata_debug_bias; + } + else if (di->bss_present + && di->bss_size > 0 + && *a >= di->bss_debug_svma && *a < di->bss_debug_svma + di->bss_size) { + *a += di->bss_debug_bias; + } + else if (di->sbss_present + && di->sbss_size > 0 + && *a >= di->sbss_debug_svma && *a < di->sbss_debug_svma + di->sbss_size) { + *a += di->sbss_debug_bias; + } + else { + return False; + } + + return True; +} + + +/* Evaluate a standard DWARF3 expression. See detailed description in + priv_d3basics.h. */ +GXResult ML_(evaluate_Dwarf3_Expr) ( UChar* expr, UWord exprszB, + GExpr* fbGX, RegSummary* regs, + const DebugInfo* di, + Bool push_initial_zero ) +{ +# define N_EXPR_STACK 20 + +# define FAIL(_str) \ + do { \ + res.kind = GXR_Failure; \ + res.word = (UWord)(_str); \ + return res; \ + } while (0) + +# define PUSH(_arg) \ + do { \ + vg_assert(sp >= -1 && sp < N_EXPR_STACK); \ + if (sp == N_EXPR_STACK-1) \ + FAIL("evaluate_Dwarf3_Expr: stack overflow(1)"); \ + sp++; \ + stack[sp] = (_arg); \ + } while (0) + +# define POP(_lval) \ + do { \ + vg_assert(sp >= -1 && sp < N_EXPR_STACK); \ + if (sp == -1) \ + FAIL("evaluate_Dwarf3_Expr: stack underflow(1)"); \ + _lval = stack[sp]; \ + sp--; \ + } while (0) + + UChar opcode; + UChar* limit; + Int sp; /* # of top element: valid is -1 .. N_EXPR_STACK-1 */ + Addr stack[N_EXPR_STACK]; /* stack of addresses, as per D3 spec */ + GXResult fbval, res; + Addr a1; + Word sw1; + UWord uw1; + Bool ok; + + sp = -1; + vg_assert(expr); + vg_assert(exprszB >= 0); + limit = expr + exprszB; + + /* Deal with the case where the entire expression is a single + Register Name Operation (D3 spec sec 2.6.1). Then the + denotation of the expression as a whole is a register name. */ + if (exprszB == 1 + && expr[0] >= DW_OP_reg0 && expr[0] <= DW_OP_reg31) { + res.kind = GXR_RegNo; + res.word = (UWord)(expr[0] - DW_OP_reg0); + return res; + } + if (exprszB > 1 + && expr[0] == DW_OP_regx) { + /* JRS: 2008Feb20: I believe the following is correct, but would + like to see a test case show up before enabling it. */ + expr++; + res.kind = GXR_RegNo; + res.word = (UWord)read_leb128U( &expr ); + if (expr != limit) + FAIL("evaluate_Dwarf3_Expr: DW_OP_regx*: invalid expr size"); + else + return res; + /*NOTREACHED*/ + } + + /* Evidently this expression denotes a value, not a register name. + So evaluate it accordingly. */ + + if (push_initial_zero) + PUSH(0); + + while (True) { + + vg_assert(sp >= -1 && sp < N_EXPR_STACK); + + if (expr > limit) + /* overrun - something's wrong */ + FAIL("evaluate_Dwarf3_Expr: ran off end of expr"); + + if (expr == limit) { + /* end of expr - return expr on the top of stack. */ + if (sp == -1) + /* stack empty. Bad. */ + FAIL("evaluate_Dwarf3_Expr: stack empty at end of expr"); + else + break; + } + + opcode = *expr++; + switch (opcode) { + case DW_OP_addr: + /* Presumably what is given in the Dwarf3 is a SVMA (how + could it be otherwise?) So we add the appropriate bias + on before pushing the result. */ + a1 = *(Addr*)expr; + if (bias_address(&a1, di)) { + PUSH( a1 ); + expr += sizeof(Addr); + } + else { + FAIL("evaluate_Dwarf3_Expr: DW_OP_addr with address " + "in unknown section"); + } + break; + case DW_OP_fbreg: + if (!fbGX) + FAIL("evaluate_Dwarf3_Expr: DW_OP_fbreg with " + "no expr for fbreg present"); + fbval = ML_(evaluate_GX)(fbGX, NULL, regs, di); + /* Convert fbval into something we can use. If we got a + Value, no problem. However, as per D3 spec sec 3.3.5 + (Low Level Information) sec 2, we could also get a + RegNo, and that is taken to mean the value in the + indicated register. So we have to manually + "dereference" it. */ + a1 = 0; + switch (fbval.kind) { + case GXR_Failure: + return fbval; /* propagate failure */ + case GXR_Value: + a1 = fbval.word; break; /* use as-is */ + case GXR_RegNo: + ok = get_Dwarf_Reg( &a1, fbval.word, regs ); + if (!ok) return fbval; /* propagate failure */ + break; + default: + vg_assert(0); + } + sw1 = (Word)read_leb128S( &expr ); + PUSH( a1 + sw1 ); + break; + /* DW_OP_breg* denotes 'contents of specified register, plus + constant offset'. So provided we know what the register's + value is, we can evaluate this. Contrast DW_OP_reg*, + which indicates that denoted location is in a register + itself. If DW_OP_reg* shows up here the expression is + malformed, since we are evaluating for value now, and + DW_OP_reg* denotes a register location, not a value. See + D3 Spec sec 2.6.1 ("Register Name Operations") for + details. */ + case DW_OP_breg0 ... DW_OP_breg31: + if (!regs) + FAIL("evaluate_Dwarf3_Expr: DW_OP_breg* but no reg info"); + a1 = 0; + if (!get_Dwarf_Reg( &a1, opcode - DW_OP_breg0, regs )) + FAIL("evaluate_Dwarf3_Expr: unhandled DW_OP_breg*"); + sw1 = (Word)read_leb128S( &expr ); + a1 += sw1; + PUSH( a1 ); + break; + /* As per comment on DW_OP_breg*, the following denote that + the value in question is in a register, not in memory. So + we simply return failure. (iow, the expression is + malformed). */ + case DW_OP_reg0 ... DW_OP_reg31: + FAIL("evaluate_Dwarf3_Expr: DW_OP_reg* " + "whilst evaluating for a value"); + break; + case DW_OP_plus_uconst: + POP(uw1); + uw1 += (UWord)read_leb128U( &expr ); + PUSH(uw1); + break; + case DW_OP_GNU_push_tls_address: + /* GDB contains the following cryptic comment: */ + /* Variable is at a constant offset in the thread-local + storage block into the objfile for the current thread and + the dynamic linker module containing this expression. Here + we return returns the offset from that base. The top of the + stack has the offset from the beginning of the thread + control block at which the variable is located. Nothing + should follow this operator, so the top of stack would be + returned. */ + /* But no spec resulting from Googling. Punt for now. */ + FAIL("warning: evaluate_Dwarf3_Expr: unhandled " + "DW_OP_GNU_push_tls_address"); + /*NOTREACHED*/ + case DW_OP_deref: + POP(uw1); + if (VG_(am_is_valid_for_client)( (Addr)uw1, sizeof(Addr), + VKI_PROT_READ )) { + uw1 = *(UWord*)uw1; + PUSH(uw1); + } else { + FAIL("warning: evaluate_Dwarf3_Expr: DW_OP_deref: " + "address not valid for client"); + } + break; + default: + if (!VG_(clo_xml)) + VG_(message)(Vg_DebugMsg, + "warning: evaluate_Dwarf3_Expr: unhandled " + "DW_OP_ 0x%x", (Int)opcode); + FAIL("evaluate_Dwarf3_Expr: unhandled DW_OP_"); + /*NOTREACHED*/ + } + + } + + vg_assert(sp >= 0 && sp < N_EXPR_STACK); + res.word = stack[sp]; + res.kind = GXR_Value; + return res; + +# undef POP +# undef PUSH +# undef FAIL +# undef N_EXPR_STACK +} + + +/* Evaluate a so-called Guarded (DWARF3) expression. See detailed + description in priv_d3basics.h. */ +GXResult ML_(evaluate_GX)( GExpr* gx, GExpr* fbGX, + RegSummary* regs, const DebugInfo* di ) +{ + GXResult res; + Addr aMin, aMax; + UChar uc; + UShort nbytes; + UWord nGuards = 0; + UChar* p = &gx->payload[0]; + uc = *p++; /*biasMe*/ + vg_assert(uc == 0 || uc == 1); + /* in fact it's senseless to evaluate if the guards need biasing. + So don't. */ + vg_assert(uc == 0); + while (True) { + uc = *p++; + if (uc == 1) { /*isEnd*/ + /* didn't find any matching range. */ + res.kind = GXR_Failure; + res.word = (UWord)"no matching range"; + return res; + } + vg_assert(uc == 0); + aMin = * (Addr*)p; p += sizeof(Addr); + aMax = * (Addr*)p; p += sizeof(Addr); + nbytes = * (UShort*)p; p += sizeof(UShort); + nGuards++; + if (0) VG_(printf)(" guard %d: %#lx %#lx\n", + (Int)nGuards, aMin,aMax); + if (regs == NULL) { + vg_assert(aMin == (Addr)0); + vg_assert(aMax == ~(Addr)0); + /* Assert this is the first guard. */ + vg_assert(nGuards == 1); + res = ML_(evaluate_Dwarf3_Expr)( + p, (UWord)nbytes, fbGX, regs, di, + False/*push_initial_zero*/ ); + /* Now check there are no more guards. */ + p += (UWord)nbytes; + vg_assert(*p == 1); /*isEnd*/ + return res; + } else { + if (aMin <= regs->ip && regs->ip <= aMax) { + /* found a matching range. Evaluate the expression. */ + return ML_(evaluate_Dwarf3_Expr)( + p, (UWord)nbytes, fbGX, regs, di, + False/*push_initial_zero*/ ); + } + } + /* else keep searching */ + p += (UWord)nbytes; + } +} + + +/* Evaluate a very simple Guarded (DWARF3) expression. The expression + is expected to denote a constant, with no reference to any + registers nor to any frame base expression. The expression is + expected to have at least one guard. If there is more than one + guard, all the sub-expressions are evaluated and compared. The + address ranges on the guards are ignored. GXR_Failure is returned + in the following circumstances: + * no guards + * any of the subexpressions require a frame base expression + * any of the subexpressions denote a register location + * any of the subexpressions do not produce a manifest constant + * there's more than one subexpression, all of which successfully + evaluate to a constant, but they don't all produce the same constant. + JRS 23Jan09: the special-casing in this function is a nasty kludge. + Really it ought to be pulled out and turned into a general + constant- expression evaluator. +*/ +GXResult ML_(evaluate_trivial_GX)( GExpr* gx, const DebugInfo* di ) +{ + GXResult res; + Addr aMin, aMax; + UChar uc; + UShort nbytes; + Word i, nGuards; + MaybeULong *mul, *mul2; + + HChar* badness = NULL; + UChar* p = &gx->payload[0]; /* must remain unsigned */ + XArray* results = VG_(newXA)( ML_(dinfo_zalloc), "di.d3basics.etG.1", + ML_(dinfo_free), + sizeof(MaybeULong) ); + + uc = *p++; /*biasMe*/ + vg_assert(uc == 0 || uc == 1); + /* in fact it's senseless to evaluate if the guards need biasing. + So don't. */ + vg_assert(uc == 0); + + nGuards = 0; + while (True) { + MaybeULong thisResult; + uc = *p++; + if (uc == 1) /*isEnd*/ + break; + vg_assert(uc == 0); + aMin = * (Addr*)p; p += sizeof(Addr); + aMax = * (Addr*)p; p += sizeof(Addr); + nbytes = * (UShort*)p; p += sizeof(UShort); + nGuards++; + if (0) VG_(printf)(" guard %ld: %#lx %#lx\n", + nGuards, aMin,aMax); + + thisResult.b = False; + thisResult.ul = 0; + + /* Peer at this particular subexpression, to see if it's + obviously a constant. */ + if (nbytes == 1 + sizeof(Addr) && *p == DW_OP_addr) { + /* DW_OP_addr a */ + Addr a = *(Addr*)(p+1); + if (bias_address(&a, di)) { + thisResult.b = True; + thisResult.ul = (ULong)a; + } else { + if (!badness) + badness = "trivial GExpr denotes constant address " + "in unknown section (1)"; + } + } + else + if (nbytes == 1 + sizeof(Addr) + 1 + 1 + /* 11 byte block: 3 c0 b6 2b 0 0 0 0 0 23 4 + (DW_OP_addr: 2bb6c0; DW_OP_plus_uconst: 4) + This is really a nasty kludge - only matches if the + trailing ULEB denotes a number in the range 0 .. 127 + inclusive. */ + && p[0] == DW_OP_addr + && p[1 + sizeof(Addr)] == DW_OP_plus_uconst + && p[1 + sizeof(Addr) + 1] < 0x80 /*1-byte ULEB*/) { + Addr a = *(Addr*)&p[1]; + if (bias_address(&a, di)) { + thisResult.b = True; + thisResult.ul = (ULong)a + (ULong)p[1 + sizeof(Addr) + 1]; + } else { + if (!badness) + badness = "trivial GExpr denotes constant address " + "in unknown section (2)"; + } + } + else + if (nbytes == 2 + sizeof(Addr) + && *p == DW_OP_addr + && *(p + 1 + sizeof(Addr)) == DW_OP_GNU_push_tls_address) { + if (!badness) + badness = "trivial GExpr is DW_OP_addr plus trailing junk"; + } + else if (nbytes >= 1 && *p >= DW_OP_reg0 && *p <= DW_OP_reg31) { + if (!badness) + badness = "trivial GExpr denotes register (1)"; + } + else if (nbytes >= 1 && *p == DW_OP_fbreg) { + if (!badness) + badness = "trivial GExpr requires fbGX"; + } + else if (nbytes >= 1 && *p >= DW_OP_breg0 && *p <= DW_OP_breg31) { + if (!badness) + badness = "trivial GExpr requires register value"; + } + else if (nbytes >= 1 && *p == DW_OP_regx) { + if (!badness) + badness = "trivial GExpr denotes register (2)"; + } + else { + VG_(printf)(" ML_(evaluate_trivial_GX): unhandled:\n "); + ML_(pp_GX)( gx ); + VG_(printf)("\n"); + tl_assert(0); + } + + VG_(addToXA)( results, &thisResult ); + + p += (UWord)nbytes; + } + + res.kind = GXR_Failure; + + tl_assert(nGuards == VG_(sizeXA)( results )); + tl_assert(nGuards >= 0); + if (nGuards == 0) { + tl_assert(!badness); + res.word = (UWord)"trivial GExpr has no guards (!)"; + VG_(deleteXA)( results ); + return res; + } + + for (i = 0; i < nGuards; i++) { + mul = VG_(indexXA)( results, i ); + if (mul->b == False) + break; + } + + vg_assert(i >= 0 && i <= nGuards); + if (i < nGuards) { + /* at least one subexpression failed to produce a manifest constant. */ + vg_assert(badness); + res.word = (UWord)badness; + VG_(deleteXA)( results ); + return res; + } + + /* All the subexpressions produced a constant, but did they all produce + the same one? */ + mul = VG_(indexXA)( results, 0 ); + tl_assert(mul->b == True); /* we just established that all exprs are ok */ + + for (i = 1; i < nGuards; i++) { + mul2 = VG_(indexXA)( results, i ); + tl_assert(mul2->b == True); + if (mul2->ul != mul->ul) { + res.word = (UWord)"trivial GExpr: subexpressions disagree"; + VG_(deleteXA)( results ); + return res; + } + } + + /* Well, we have success. All subexpressions evaluated, and + they all agree. Hurrah. */ + res.kind = GXR_Value; + res.word = (UWord)mul->ul; /* NB: narrowing from ULong */ + VG_(deleteXA)( results ); + return res; +} + + +void ML_(pp_GXResult) ( GXResult res ) +{ + switch (res.kind) { + case GXR_Failure: + VG_(printf)("GXR_Failure(%s)", (HChar*)res.word); break; + case GXR_Value: + VG_(printf)("GXR_Value(0x%lx)", res.word); break; + case GXR_RegNo: + VG_(printf)("GXR_RegNo(%lu)", res.word); break; + default: + VG_(printf)("GXR_???"); break; + } +} + + +void ML_(pp_GX) ( GExpr* gx ) { + Addr aMin, aMax; + UChar uc; + UShort nbytes; + UChar* p = &gx->payload[0]; + uc = *p++; + VG_(printf)("GX(%s){", uc == 0 ? "final" : "Breqd" ); + vg_assert(uc == 0 || uc == 1); + while (True) { + uc = *p++; + if (uc == 1) + break; /*isEnd*/ + vg_assert(uc == 0); + aMin = * (Addr*)p; p += sizeof(Addr); + aMax = * (Addr*)p; p += sizeof(Addr); + nbytes = * (UShort*)p; p += sizeof(UShort); + VG_(printf)("[%#lx,%#lx]=", aMin, aMax); + while (nbytes > 0) { + VG_(printf)("%02x", (UInt)*p++); + nbytes--; + } + if (*p == 0) + VG_(printf)(","); + } + VG_(printf)("}"); +} + + +/*--------------------------------------------------------------------*/ +/*--- end d3basics.c ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/debuginfo.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/debuginfo.c.svn-base new file mode 100644 index 0000000..b4e2e63 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/debuginfo.c.svn-base @@ -0,0 +1,3391 @@ + +/*--------------------------------------------------------------------*/ +/*--- Top level management of symbols and debugging information. ---*/ +/*--- debuginfo.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +#include "pub_core_basics.h" +#include "pub_core_vki.h" +#include "pub_core_threadstate.h" +#include "pub_core_debuginfo.h" /* self */ +#include "pub_core_demangle.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_libcfile.h" +#include "pub_core_seqmatch.h" +#include "pub_core_options.h" +#include "pub_core_redir.h" // VG_(redir_notify_{new,delete}_SegInfo) +#include "pub_core_aspacemgr.h" +#include "pub_core_machine.h" // VG_PLAT_USES_PPCTOC +#include "pub_core_xarray.h" +#include "pub_core_oset.h" +#include "pub_core_stacktrace.h" // VG_(get_StackTrace) XXX: circular dependency + +#include "priv_misc.h" /* dinfo_zalloc/free */ +#include "priv_d3basics.h" /* ML_(pp_GX) */ +#include "priv_tytypes.h" +#include "priv_storage.h" +#include "priv_readdwarf.h" +#include "priv_readstabs.h" +#if defined(VGO_linux) +# include "priv_readelf.h" +# include "priv_readdwarf3.h" +# include "priv_readpdb.h" +#elif defined(VGO_aix5) +# include "pub_core_debuglog.h" +# include "pub_core_libcproc.h" +# include "pub_core_libcfile.h" +# include "priv_readxcoff.h" +#endif + + +/*------------------------------------------------------------*/ +/*--- The _svma / _avma / _image / _bias naming scheme ---*/ +/*------------------------------------------------------------*/ + +/* JRS 11 Jan 07: I find the different kinds of addresses involved in + debuginfo reading confusing. Recently I arrived at some + terminology which makes it clearer (to me, at least). There are 3 + kinds of address used in the debuginfo reading process: + + stated VMAs - the address where (eg) a .so says a symbol is, that + is, what it tells you if you consider the .so in + isolation + + actual VMAs - the address where (eg) said symbol really wound up + after the .so was mapped into memory + + image addresses - pointers into the copy of the .so (etc) + transiently mmaped aboard whilst we read its info + + Additionally I use the term 'bias' to denote the difference + between stated and actual VMAs for a given entity. + + This terminology is not used consistently, but a start has been + made. readelf.c and the call-frame info reader in readdwarf.c now + use it. Specifically, various variables and structure fields have + been annotated with _avma / _svma / _image / _bias. In places _img + is used instead of _image for the sake of brevity. +*/ + + +/*------------------------------------------------------------*/ +/*--- fwdses ---*/ +/*------------------------------------------------------------*/ + +static void cfsi_cache__invalidate ( void ); + + +/*------------------------------------------------------------*/ +/*--- Root structure ---*/ +/*------------------------------------------------------------*/ + +/* The root structure for the entire debug info system. It is a + linked list of DebugInfos. */ +static DebugInfo* debugInfo_list = NULL; + + +/* Find 'di' in the debugInfo_list and move it one step closer the the + front of the list, so as to make subsequent searches for it + cheaper. When used in a controlled way, makes a major improvement + in some DebugInfo-search-intensive situations, most notably stack + unwinding on amd64-linux. */ +static void move_DebugInfo_one_step_forward ( DebugInfo* di ) +{ + DebugInfo *di0, *di1, *di2; + if (di == debugInfo_list) + return; /* already at head of list */ + vg_assert(di != NULL); + di0 = debugInfo_list; + di1 = NULL; + di2 = NULL; + while (True) { + if (di0 == NULL || di0 == di) break; + di2 = di1; + di1 = di0; + di0 = di0->next; + } + vg_assert(di0 == di); + if (di0 != NULL && di1 != NULL && di2 != NULL) { + DebugInfo* tmp; + /* di0 points to di, di1 to its predecessor, and di2 to di1's + predecessor. Swap di0 and di1, that is, move di0 one step + closer to the start of the list. */ + vg_assert(di2->next == di1); + vg_assert(di1->next == di0); + tmp = di0->next; + di2->next = di0; + di0->next = di1; + di1->next = tmp; + } + else + if (di0 != NULL && di1 != NULL && di2 == NULL) { + /* it's second in the list. */ + vg_assert(debugInfo_list == di1); + vg_assert(di1->next == di0); + di1->next = di0->next; + di0->next = di1; + debugInfo_list = di0; + } +} + + +/*------------------------------------------------------------*/ +/*--- Notification (acquire/discard) helpers ---*/ +/*------------------------------------------------------------*/ + +/* Gives out unique abstract handles for allocated DebugInfos. See + comment in priv_storage.h, declaration of struct _DebugInfo, for + details. */ +static ULong handle_counter = 1; + +/* Allocate and zero out a new DebugInfo record. */ +static +DebugInfo* alloc_DebugInfo( const UChar* filename, + const UChar* memname ) +{ + Bool traceme; + DebugInfo* di; + + vg_assert(filename); + + di = ML_(dinfo_zalloc)("di.debuginfo.aDI.1", sizeof(DebugInfo)); + di->handle = handle_counter++; + di->filename = ML_(dinfo_strdup)("di.debuginfo.aDI.2", filename); + di->memname = memname ? ML_(dinfo_strdup)("di.debuginfo.aDI.3", memname) + : NULL; + + /* Everything else -- pointers, sizes, arrays -- is zeroed by calloc. + Now set up the debugging-output flags. */ + traceme + = VG_(string_match)( VG_(clo_trace_symtab_patt), filename ) + || (memname && VG_(string_match)( VG_(clo_trace_symtab_patt), + memname )); + if (traceme) { + di->trace_symtab = VG_(clo_trace_symtab); + di->trace_cfi = VG_(clo_trace_cfi); + di->ddump_syms = VG_(clo_debug_dump_syms); + di->ddump_line = VG_(clo_debug_dump_line); + di->ddump_frames = VG_(clo_debug_dump_frames); + } + + return di; +} + + +/* Free a DebugInfo, and also all the stuff hanging off it. */ +static void free_DebugInfo ( DebugInfo* di ) +{ + Word i, j, n; + struct strchunk *chunk, *next; + TyEnt* ent; + GExpr* gexpr; + + vg_assert(di != NULL); + if (di->filename) ML_(dinfo_free)(di->filename); + if (di->symtab) ML_(dinfo_free)(di->symtab); + if (di->loctab) ML_(dinfo_free)(di->loctab); + if (di->cfsi) ML_(dinfo_free)(di->cfsi); + if (di->cfsi_exprs) VG_(deleteXA)(di->cfsi_exprs); + if (di->fpo) ML_(dinfo_free)(di->fpo); + + for (chunk = di->strchunks; chunk != NULL; chunk = next) { + next = chunk->next; + ML_(dinfo_free)(chunk); + } + + /* Delete the two admin arrays. These lists exist primarily so + that we can visit each object exactly once when we need to + delete them. */ + if (di->admin_tyents) { + n = VG_(sizeXA)(di->admin_tyents); + for (i = 0; i < n; i++) { + ent = (TyEnt*)VG_(indexXA)(di->admin_tyents, i); + /* Dump anything hanging off this ent */ + ML_(TyEnt__make_EMPTY)(ent); + } + VG_(deleteXA)(di->admin_tyents); + di->admin_tyents = NULL; + } + + if (di->admin_gexprs) { + n = VG_(sizeXA)(di->admin_gexprs); + for (i = 0; i < n; i++) { + gexpr = *(GExpr**)VG_(indexXA)(di->admin_gexprs, i); + ML_(dinfo_free)(gexpr); + } + VG_(deleteXA)(di->admin_gexprs); + di->admin_gexprs = NULL; + } + + /* Dump the variable info. This is kinda complex: we must take + care not to free items which reside in either the admin lists + (as we have just freed them) or which reside in the DebugInfo's + string table. */ + if (di->varinfo) { + for (i = 0; i < VG_(sizeXA)(di->varinfo); i++) { + OSet* scope = *(OSet**)VG_(indexXA)(di->varinfo, i); + if (!scope) continue; + /* iterate over all entries in 'scope' */ + VG_(OSetGen_ResetIter)(scope); + while (True) { + DiAddrRange* arange = VG_(OSetGen_Next)(scope); + if (!arange) break; + /* for each var in 'arange' */ + vg_assert(arange->vars); + for (j = 0; j < VG_(sizeXA)( arange->vars ); j++) { + DiVariable* var = (DiVariable*)VG_(indexXA)(arange->vars,j); + vg_assert(var); + /* Nothing to free in var: all the pointer fields refer + to stuff either on an admin list, or in + .strchunks */ + } + VG_(deleteXA)(arange->vars); + /* Don't free arange itself, as OSetGen_Destroy does + that */ + } + VG_(OSetGen_Destroy)(scope); + } + VG_(deleteXA)(di->varinfo); + } + + ML_(dinfo_free)(di); +} + + +/* 'si' is a member of debugInfo_list. Find it, remove it from the + list, notify m_redir that this has happened, and free all storage + reachable from it. +*/ +static void discard_DebugInfo ( DebugInfo* di ) +{ +# if defined(VGP_ppc32_aix5) + HChar* reason = "__unload"; +# elif defined(VGP_ppc64_aix5) + HChar* reason = "kunload64"; +# else + HChar* reason = "munmap"; +# endif + + DebugInfo** prev_next_ptr = &debugInfo_list; + DebugInfo* curr = debugInfo_list; + + while (curr) { + if (curr == di) { + /* Found it; remove from list and free it. */ + if (curr->have_dinfo + && (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir))) + VG_(message)(Vg_DebugMsg, + "Discarding syms at %#lx-%#lx in %s due to %s()", + di->text_avma, + di->text_avma + di->text_size, + curr->filename ? curr->filename : (UChar*)"???", + reason); + vg_assert(*prev_next_ptr == curr); + *prev_next_ptr = curr->next; + if (curr->have_dinfo) + VG_(redir_notify_delete_DebugInfo)( curr ); + free_DebugInfo(curr); + return; + } + prev_next_ptr = &curr->next; + curr = curr->next; + } + + /* Not found. */ +} + + +/* Repeatedly scan debugInfo_list, looking for DebugInfos with text + AVMAs intersecting [start,start+length), and call discard_DebugInfo + to get rid of them. This modifies the list, hence the multiple + iterations. Returns True iff any such DebugInfos were found. +*/ +static Bool discard_syms_in_range ( Addr start, SizeT length ) +{ + Bool anyFound = False; + Bool found; + DebugInfo* curr; + + while (True) { + found = False; + + curr = debugInfo_list; + while (True) { + if (curr == NULL) + break; + if (curr->text_present + && curr->text_size > 0 + && (start+length - 1 < curr->text_avma + || curr->text_avma + curr->text_size - 1 < start)) { + /* no overlap */ + } else { + found = True; + break; + } + curr = curr->next; + } + + if (!found) break; + anyFound = True; + discard_DebugInfo( curr ); + } + + return anyFound; +} + + +/* Does [s1,+len1) overlap [s2,+len2) ? Note: does not handle + wraparound at the end of the address space -- just asserts in that + case. */ +static Bool ranges_overlap (Addr s1, SizeT len1, Addr s2, SizeT len2 ) +{ + Addr e1, e2; + if (len1 == 0 || len2 == 0) + return False; + e1 = s1 + len1 - 1; + e2 = s2 + len2 - 1; + /* Assert that we don't have wraparound. If we do it would imply + that file sections are getting mapped around the end of the + address space, which sounds unlikely. */ + vg_assert(s1 <= e1); + vg_assert(s2 <= e2); + if (e1 < s2 || e2 < s1) return False; + return True; +} + + +/* Do the basic rx_ and rw_ mappings of the two DebugInfos overlap in + any way? */ +static Bool do_DebugInfos_overlap ( DebugInfo* di1, DebugInfo* di2 ) +{ + vg_assert(di1); + vg_assert(di2); + + if (di1->have_rx_map && di2->have_rx_map + && ranges_overlap(di1->rx_map_avma, di1->rx_map_size, + di2->rx_map_avma, di2->rx_map_size)) + return True; + + if (di1->have_rx_map && di2->have_rw_map + && ranges_overlap(di1->rx_map_avma, di1->rx_map_size, + di2->rw_map_avma, di2->rw_map_size)) + return True; + + if (di1->have_rw_map && di2->have_rx_map + && ranges_overlap(di1->rw_map_avma, di1->rw_map_size, + di2->rx_map_avma, di2->rx_map_size)) + return True; + + if (di1->have_rw_map && di2->have_rw_map + && ranges_overlap(di1->rw_map_avma, di1->rw_map_size, + di2->rw_map_avma, di2->rw_map_size)) + return True; + + return False; +} + + +/* Discard all elements of debugInfo_list whose .mark bit is set. +*/ +static void discard_marked_DebugInfos ( void ) +{ + DebugInfo* curr; + + while (True) { + + curr = debugInfo_list; + while (True) { + if (!curr) + break; + if (curr->mark) + break; + curr = curr->next; + } + + if (!curr) break; + discard_DebugInfo( curr ); + + } +} + + +/* Discard any elements of debugInfo_list which overlap with diRef. + Clearly diRef must have its rx_ and rw_ mapping information set to + something sane. */ +#if defined(VGO_aix5) +__attribute__((unused)) +#endif +static void discard_DebugInfos_which_overlap_with ( DebugInfo* diRef ) +{ + DebugInfo* di; + /* Mark all the DebugInfos in debugInfo_list that need to be + deleted. First, clear all the mark bits; then set them if they + overlap with siRef. Since siRef itself is in this list we at + least expect its own mark bit to be set. */ + for (di = debugInfo_list; di; di = di->next) { + di->mark = do_DebugInfos_overlap( di, diRef ); + if (di == diRef) { + vg_assert(di->mark); + di->mark = False; + } + } + discard_marked_DebugInfos(); +} + + +/* Find the existing DebugInfo for (memname,filename) or if not found, + create one. In the latter case memname and filename are strdup'd + into VG_AR_DINFO, and the new DebugInfo is added to + debugInfo_list. */ +static +DebugInfo* find_or_create_DebugInfo_for ( UChar* filename, UChar* memname ) +{ + DebugInfo* di; + vg_assert(filename); + for (di = debugInfo_list; di; di = di->next) { + vg_assert(di->filename); + if (0==VG_(strcmp)(di->filename, filename) + && ( (memname && di->memname) + ? 0==VG_(strcmp)(memname, di->memname) + : True )) + break; + } + if (!di) { + di = alloc_DebugInfo(filename, memname); + vg_assert(di); + di->next = debugInfo_list; + debugInfo_list = di; + } + return di; +} + + +/* Debuginfo reading for 'di' has just been successfully completed. + Check that the invariants stated in + "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in + priv_storage.h are observed. */ +static void check_CFSI_related_invariants ( DebugInfo* di ) +{ + DebugInfo* di2 = NULL; + vg_assert(di); + /* This fn isn't called until after debuginfo for this object has + been successfully read. And that shouldn't happen until we have + both a r-x and rw- mapping for the object. Hence: */ + vg_assert(di->have_rx_map); + vg_assert(di->have_rw_map); + /* degenerate case: r-x section is empty */ + if (di->rx_map_size == 0) { + vg_assert(di->cfsi == NULL); + return; + } + /* normal case: r-x section is nonempty */ + /* invariant (0) */ + vg_assert(di->rx_map_size > 0); + /* invariant (1) */ + for (di2 = debugInfo_list; di2; di2 = di2->next) { + if (di2 == di) + continue; + if (di2->rx_map_size == 0) + continue; + vg_assert(di->rx_map_avma + di->rx_map_size <= di2->rx_map_avma + || di2->rx_map_avma + di2->rx_map_size <= di->rx_map_avma); + } + di2 = NULL; + /* invariant (2) */ + if (di->cfsi) { + vg_assert(di->cfsi_minavma <= di->cfsi_maxavma); /* duh! */ + vg_assert(di->cfsi_minavma >= di->rx_map_avma); + vg_assert(di->cfsi_maxavma < di->rx_map_avma + di->rx_map_size); + } + /* invariants (3) and (4) */ + if (di->cfsi) { + Word i; + vg_assert(di->cfsi_used > 0); + vg_assert(di->cfsi_size > 0); + for (i = 0; i < di->cfsi_used; i++) { + DiCfSI* cfsi = &di->cfsi[i]; + vg_assert(cfsi->len > 0); + vg_assert(cfsi->base >= di->cfsi_minavma); + vg_assert(cfsi->base + cfsi->len - 1 <= di->cfsi_maxavma); + if (i > 0) { + DiCfSI* cfsip = &di->cfsi[i-1]; + vg_assert(cfsip->base + cfsip->len <= cfsi->base); + } + } + } else { + vg_assert(di->cfsi_used == 0); + vg_assert(di->cfsi_size == 0); + } +} + + +/*--------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL: INITIALISE THE DEBUGINFO SYSTEM ---*/ +/*--- ---*/ +/*--------------------------------------------------------------*/ + +void VG_(di_initialise) ( void ) +{ + /* There's actually very little to do here, since everything + centers around the DebugInfos in debugInfo_list, they are + created and destroyed on demand, and each one is treated more or + less independently. */ + vg_assert(debugInfo_list == NULL); + + /* flush the CFI fast query cache. */ + cfsi_cache__invalidate(); +} + + +/*--------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) (LINUX) ---*/ +/*--- ---*/ +/*--------------------------------------------------------------*/ + +#if defined(VGO_linux) + +/* The debug info system is driven by notifications that a text + segment has been mapped in, or unmapped. When that happens it + tries to acquire/discard whatever info is available for the + corresponding object. This section contains the notification + handlers. */ + +/* Notify the debuginfo system about a new mapping. This is the way + new debug information gets loaded. If allow_SkFileV is True, it + will try load debug info if the mapping at 'a' belongs to Valgrind; + whereas normally (False) it will not do that. This allows us to + carefully control when the thing will read symbols from the + Valgrind executable itself. + + If a call to VG_(di_notify_mmap) causes debug info to be read, then + the returned ULong is an abstract handle which can later be used to + refer to the debuginfo read as a result of this specific mapping, + in later queries to m_debuginfo. In this case the handle value + will be one or above. If the returned value is zero, no debug info + was read. */ + +ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV ) +{ + NSegment const * seg; + HChar* filename; + Bool ok, is_rx_map, is_rw_map; + DebugInfo* di; + ULong di_handle; + SysRes fd; + Int nread; + HChar buf1k[1024]; + Bool debug = False; + SysRes statres; + struct vg_stat statbuf; + + /* In short, figure out if this mapping is of interest to us, and + if so, try to guess what ld.so is doing and when/if we should + read debug info. */ + seg = VG_(am_find_nsegment)(a); + vg_assert(seg); + + if (debug) + VG_(printf)("di_notify_mmap-1: %#lx-%#lx %c%c%c\n", + seg->start, seg->end, + seg->hasR ? 'r' : '-', + seg->hasW ? 'w' : '-',seg->hasX ? 'x' : '-' ); + + /* guaranteed by aspacemgr-linux.c, sane_NSegment() */ + vg_assert(seg->end > seg->start); + + /* Ignore non-file mappings */ + if ( ! (seg->kind == SkFileC + || (seg->kind == SkFileV && allow_SkFileV)) ) + return 0; + + /* If the file doesn't have a name, we're hosed. Give up. */ + filename = VG_(am_get_filename)( (NSegment*)seg ); + if (!filename) + return 0; + + if (debug) + VG_(printf)("di_notify_mmap-2: %s\n", filename); + + /* Only try to read debug information from regular files. */ + statres = VG_(stat)(filename, &statbuf); + + /* stat dereferences symlinks, so we don't expect it to succeed and + yet produce something that is a symlink. */ + vg_assert(statres.isError || ! VKI_S_ISLNK(statbuf.st_mode)); + + /* Don't let the stat call fail silently. Filter out some known + sources of noise before complaining, though. */ + if (statres.isError) { + DebugInfo fake_di; + Bool quiet = VG_(strstr)(filename, "/var/run/nscd/") != NULL; + if (!quiet && VG_(clo_verbosity) > 1) { + VG_(memset)(&fake_di, 0, sizeof(fake_di)); + fake_di.filename = filename; + ML_(symerr)(&fake_di, True, "failed to stat64/stat this file"); + } + return 0; + } + + /* Finally, the point of all this stattery: if it's not a regular file, + don't try to read debug info from it. */ + if (! VKI_S_ISREG(statbuf.st_mode)) + return 0; + + /* no uses of statbuf below here. */ + + /* Now we have to guess if this is a text-like mapping, a data-like + mapping, neither or both. The rules are: + + text if: x86-linux r and x + other-linux r and x and not w + + data if: x86-linux r and w + other-linux r and w and not x + + Background: On x86-linux, objects are typically mapped twice: + + 1b8fb000-1b8ff000 r-xp 00000000 08:02 4471477 vgpreload_memcheck.so + 1b8ff000-1b900000 rw-p 00004000 08:02 4471477 vgpreload_memcheck.so + + whereas ppc32-linux mysteriously does this: + + 118a6000-118ad000 r-xp 00000000 08:05 14209428 vgpreload_memcheck.so + 118ad000-118b6000 ---p 00007000 08:05 14209428 vgpreload_memcheck.so + 118b6000-118bd000 rwxp 00000000 08:05 14209428 vgpreload_memcheck.so + + The third mapping should not be considered to have executable + code in. Therefore a test which works for both is: r and x and + NOT w. Reading symbols from the rwx segment -- which overlaps + the r-x segment in the file -- causes the redirection mechanism + to redirect to addresses in that third segment, which is wrong + and causes crashes. + + JRS 28 Dec 05: unfortunately icc 8.1 on x86 has been seen to + produce executables with a single rwx segment rather than a + (r-x,rw-) pair. That means the rules have to be modified thusly: + + x86-linux: consider if r and x + all others: consider if r and x and not w + */ + is_rx_map = False; + is_rw_map = False; +# if defined(VGP_x86_linux) + is_rx_map = seg->hasR && seg->hasX; + is_rw_map = seg->hasR && seg->hasW; +# elif defined(VGP_amd64_linux) \ + || defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) + is_rx_map = seg->hasR && seg->hasX && !seg->hasW; + is_rw_map = seg->hasR && seg->hasW && !seg->hasX; +# else +# error "Unknown platform" +# endif + + if (debug) + VG_(printf)("di_notify_mmap-3: is_rx_map %d, is_rw_map %d\n", + (Int)is_rx_map, (Int)is_rw_map); + + /* If it is neither text-ish nor data-ish, we're not interested. */ + if (!(is_rx_map || is_rw_map)) + return 0; + + /* Peer at the first few bytes of the file, to see if it is an ELF */ + /* object file. Ignore the file if we do not have read permission. */ + VG_(memset)(buf1k, 0, sizeof(buf1k)); + fd = VG_(open)( filename, VKI_O_RDONLY, 0 ); + if (fd.isError) { + if (fd.err != VKI_EACCES) + { + DebugInfo fake_di; + VG_(memset)(&fake_di, 0, sizeof(fake_di)); + fake_di.filename = filename; + ML_(symerr)(&fake_di, True, "can't open file to inspect ELF header"); + } + return 0; + } + nread = VG_(read)( fd.res, buf1k, sizeof(buf1k) ); + VG_(close)( fd.res ); + + if (nread == 0) + return 0; + if (nread < 0) { + DebugInfo fake_di; + VG_(memset)(&fake_di, 0, sizeof(fake_di)); + fake_di.filename = filename; + ML_(symerr)(&fake_di, True, "can't read file to inspect ELF header"); + return 0; + } + vg_assert(nread > 0 && nread <= sizeof(buf1k) ); + + /* We're only interested in mappings of ELF object files. */ + if (!ML_(is_elf_object_file)( buf1k, (SizeT)nread )) + return 0; + + /* See if we have a DebugInfo for this filename. If not, + create one. */ + di = find_or_create_DebugInfo_for( filename, NULL/*membername*/ ); + vg_assert(di); + + if (is_rx_map) { + /* We have a text-like mapping. Note the details. */ + if (!di->have_rx_map) { + di->have_rx_map = True; + di->rx_map_avma = a; + di->rx_map_size = seg->end + 1 - seg->start; + di->rx_map_foff = seg->offset; + } else { + /* FIXME: complain about a second text-like mapping */ + } + } + + if (is_rw_map) { + /* We have a data-like mapping. Note the details. */ + if (!di->have_rw_map) { + di->have_rw_map = True; + di->rw_map_avma = a; + di->rw_map_size = seg->end + 1 - seg->start; + di->rw_map_foff = seg->offset; + } else { + /* FIXME: complain about a second data-like mapping */ + } + } + + /* If we don't have an rx and rw mapping, or if we already have + debuginfo for this mapping for whatever reason, go no + further. */ + if ( ! (di->have_rx_map && di->have_rw_map && !di->have_dinfo) ) + return 0; + + /* Ok, so, finally, let's try to read the debuginfo. */ + vg_assert(di->filename); + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("------ start ELF OBJECT " + "------------------------------\n"); + TRACE_SYMTAB("------ name = %s\n", di->filename); + TRACE_SYMTAB("\n"); + + /* We're going to read symbols and debug info for the avma + ranges [rx_map_avma, +rx_map_size) and [rw_map_avma, + +rw_map_size). First get rid of any other DebugInfos which + overlap either of those ranges (to avoid total confusion). */ + discard_DebugInfos_which_overlap_with( di ); + + /* .. and acquire new info. */ + ok = ML_(read_elf_debug_info)( di ); + + if (ok) { + + TRACE_SYMTAB("\n------ Canonicalising the " + "acquired info ------\n"); + /* invalidate the CFI unwind cache. */ + cfsi_cache__invalidate(); + /* prepare read data for use */ + ML_(canonicaliseTables)( di ); + /* notify m_redir about it */ + TRACE_SYMTAB("\n------ Notifying m_redir ------\n"); + VG_(redir_notify_new_DebugInfo)( di ); + /* Note that we succeeded */ + di->have_dinfo = True; + tl_assert(di->handle > 0); + di_handle = di->handle; + /* Check invariants listed in + Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in + priv_storage.h. */ + check_CFSI_related_invariants(di); + + } else { + TRACE_SYMTAB("\n------ ELF reading failed ------\n"); + /* Something went wrong (eg. bad ELF file). Should we delete + this DebugInfo? No - it contains info on the rw/rx + mappings, at least. */ + di_handle = 0; + vg_assert(di->have_dinfo == False); + } + + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("------ name = %s\n", di->filename); + TRACE_SYMTAB("------ end ELF OBJECT " + "------------------------------\n"); + TRACE_SYMTAB("\n"); + + return di_handle; +} + + +/* Unmap is simpler - throw away any SegInfos intersecting + [a, a+len). */ +void VG_(di_notify_munmap)( Addr a, SizeT len ) +{ + Bool anyFound; + if (0) VG_(printf)("DISCARD %#lx %#lx\n", a, a+len); + anyFound = discard_syms_in_range(a, len); + if (anyFound) + cfsi_cache__invalidate(); +} + + +/* Uh, this doesn't do anything at all. IIRC glibc (or ld.so, I don't + remember) does a bunch of mprotects on itself, and if we follow + through here, it causes the debug info for that object to get + discarded. */ +void VG_(di_notify_mprotect)( Addr a, SizeT len, UInt prot ) +{ + Bool exe_ok = toBool(prot & VKI_PROT_EXEC); +# if defined(VGP_x86_linux) + exe_ok = exe_ok || toBool(prot & VKI_PROT_READ); +# endif + if (0 && !exe_ok) { + Bool anyFound = discard_syms_in_range(a, len); + if (anyFound) + cfsi_cache__invalidate(); + } +} + +/*--------- PDB (windows debug info) reading --------- */ + +/* this should really return ULong, as per VG_(di_notify_mmap). */ +void VG_(di_notify_pdb_debuginfo)( Int fd_obj, Addr avma_obj, + SizeT total_size, + PtrdiffT unknown_purpose__reloc ) +{ + Int r, sz_exename; + ULong obj_mtime, pdb_mtime; + Char exename[VKI_PATH_MAX]; + Char* pdbname = NULL; + Char* dot; + SysRes sres; + Int fd_pdbimage; + SizeT n_pdbimage; + struct vg_stat stat_buf; + + if (VG_(clo_verbosity) > 0) { + VG_(message)(Vg_UserMsg, ""); + VG_(message)(Vg_UserMsg, + "LOAD_PDB_DEBUGINFO(fd=%d, avma=%#lx, total_size=%lu, " + "uu_reloc=%#lx)", + fd_obj, avma_obj, total_size, unknown_purpose__reloc + ); + } + + /* 'fd' refers to the .exe/.dll we're dealing with. Get its modification + time into obj_mtime. */ + r = VG_(fstat)(fd_obj, &stat_buf); + if (r == -1) + goto out; /* stat failed ?! */ + vg_assert(r == 0); + obj_mtime = stat_buf.st_mtime; + + /* and get its name into exename[]. */ + vg_assert(VKI_PATH_MAX > 100); /* to ensure /proc/self/fd/%d is safe */ + VG_(memset)(exename, 0, sizeof(exename)); + VG_(sprintf)(exename, "/proc/self/fd/%d", fd_obj); + /* convert exename from a symlink to real name .. overwrites the + old contents of the buffer. Ick. */ + sz_exename = VG_(readlink)(exename, exename, sizeof(exename)-2 ); + if (sz_exename == -1) + goto out; /* readlink failed ?! */ + vg_assert(sz_exename >= 0 && sz_exename < sizeof(exename)); + vg_assert(exename[sizeof(exename)-1] == 0); + + if (VG_(clo_verbosity) > 0) { + VG_(message)(Vg_UserMsg, "LOAD_PDB_DEBUGINFO: objname: %s", exename); + } + + /* Try to find a matching PDB file from which to read debuginfo. + Windows PE files have symbol tables and line number information, + but MSVC doesn't seem to use them. */ + /* Why +5 ? Because in the worst case, we could find a dot as the + last character of pdbname, and we'd then put "pdb" right after + it, hence extending it a bit. */ + pdbname = ML_(dinfo_zalloc)("di.debuginfo.lpd1", sz_exename+5); + VG_(strcpy)(pdbname, exename); + vg_assert(pdbname[sz_exename+5-1] == 0); + dot = VG_(strrchr)(pdbname, '.'); + if (!dot) + goto out; /* there's no dot in the exe's name ?! */ + if (dot[1] == 0) + goto out; /* hmm, path ends in "." */ + + if ('A' <= dot[1] && dot[1] <= 'Z') + VG_(strcpy)(dot, ".PDB"); + else + VG_(strcpy)(dot, ".pdb"); + + vg_assert(pdbname[sz_exename+5-1] == 0); + + /* See if we can find it, and check it's in-dateness. */ + sres = VG_(stat)(pdbname, &stat_buf); + if (sres.isError) { + VG_(message)(Vg_UserMsg, "Warning: Missing or un-stat-able %s", + pdbname); + if (VG_(clo_verbosity) > 0) + VG_(message)(Vg_UserMsg, "LOAD_PDB_DEBUGINFO: missing: %s", pdbname); + goto out; + } + pdb_mtime = stat_buf.st_mtime; + if (pdb_mtime < obj_mtime ) { + /* PDB file is older than PE file - ignore it or we will either + (a) print wrong stack traces or more likely (b) crash. */ + VG_(message)(Vg_UserMsg, "Warning: Ignoring %s since it is older than %s", + pdbname, exename); + goto out; + } + + sres = VG_(open)(pdbname, VKI_O_RDONLY, 0); + if (sres.isError) { + VG_(message)(Vg_UserMsg, "Warning: Can't open %s", pdbname); + goto out; + } + + /* Looks promising; go on to try and read stuff from it. */ + fd_pdbimage = sres.res; + n_pdbimage = stat_buf.st_size; + sres = VG_(am_mmap_file_float_valgrind)( n_pdbimage, VKI_PROT_READ, + fd_pdbimage, 0 ); + if (sres.isError) { + VG_(close)(fd_pdbimage); + goto out; + } + + if (VG_(clo_verbosity) > 0) + VG_(message)(Vg_UserMsg, "LOAD_PDB_DEBUGINFO: pdbname: %s", pdbname); + + /* play safe; always invalidate the CFI cache. I don't know if + this is necessary, but anyway .. */ + cfsi_cache__invalidate(); + /* dump old info for this range, if any */ + discard_syms_in_range( avma_obj, total_size ); + + { void* pdbimage = (void*)sres.res; + DebugInfo* di = find_or_create_DebugInfo_for(exename, NULL/*membername*/ ); + + /* this di must be new, since we just nuked any old stuff in the range */ + vg_assert(di && !di->have_rx_map && !di->have_rw_map); + vg_assert(!di->have_dinfo); + + /* don't set up any of the di-> fields; let + ML_(read_pdb_debug_info) do it. */ + ML_(read_pdb_debug_info)( di, avma_obj, unknown_purpose__reloc, + pdbimage, n_pdbimage, pdbname, pdb_mtime ); + // JRS fixme: take notice of return value from read_pdb_debug_info, + // and handle failure + vg_assert(di->have_dinfo); // fails if PDB read failed + VG_(am_munmap_valgrind)( (Addr)pdbimage, n_pdbimage ); + VG_(close)(fd_pdbimage); + } + + out: + if (pdbname) ML_(dinfo_free)(pdbname); +} + +#endif /* defined(VGO_linux) */ + + +/*-------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) (AIX5) ---*/ +/*--- ---*/ +/*-------------------------------------------------------------*/ + +#if defined(VGO_aix5) + +/* The supplied parameters describe a code segment and its associated + data segment, that have recently been mapped in -- so we need to + read debug info for it -- or conversely, have recently been dumped, + in which case the relevant debug info has to be unloaded. */ + +ULong VG_(di_aix5_notify_segchange)( + Addr code_start, + Word code_len, + Addr data_start, + Word data_len, + UChar* file_name, + UChar* mem_name, + Bool is_mainexe, + Bool acquire ) +{ + ULong hdl = 0; + + /* play safe; always invalidate the CFI cache. Not + that it should be used on AIX, but still .. */ + cfsi_cache__invalidate(); + + if (acquire) { + + Bool ok; + DebugInfo* di; + di = find_or_create_DebugInfo_for( file_name, mem_name ); + vg_assert(di); + + if (code_len > 0) { + di->text_present = True; + di->text_svma = 0; /* don't know yet */ + di->text_bias = 0; /* don't know yet */ + di->text_avma = code_start; + di->text_size = code_len; + } + if (data_len > 0) { + di->data_present = True; + di->data_svma = 0; /* don't know yet */ + di->data_bias = 0; /* don't know yet */ + di->data_avma = data_start; + di->data_size = data_len; + } + + /* These need to be filled in in order to keep various + assertions in storage.c happy. In particular see + "Comment_Regarding_Text_Range_Checks" in that file. */ + di->have_rx_map = True; + di->rx_map_avma = code_start; + di->rx_map_size = code_len; + di->have_rw_map = True; + di->rw_map_avma = data_start; + di->rw_map_size = data_len; + + ok = ML_(read_xcoff_debug_info) ( di, is_mainexe ); + + if (ok) { + /* prepare read data for use */ + ML_(canonicaliseTables)( di ); + /* notify m_redir about it */ + VG_(redir_notify_new_DebugInfo)( di ); + /* Note that we succeeded */ + di->have_dinfo = True; + hdl = di->handle; + vg_assert(hdl > 0); + /* Check invariants listed in + Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in + priv_storage.h. */ + check_CFSI_related_invariants(di); + } else { + /* Something went wrong (eg. bad XCOFF file). */ + discard_DebugInfo( di ); + di = NULL; + } + + } else { + + /* Dump all the debugInfos whose text segments intersect + code_start/code_len. */ + /* CFI cache is always invalidated at start of this routine. + Hence it's safe to ignore the return value of + discard_syms_in_range. */ + if (code_len > 0) + (void)discard_syms_in_range( code_start, code_len ); + + } + + return hdl; +} + + +#endif /* defined(VGO_aix5) */ + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL: QUERYING EXISTING DEBUG INFO ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +void VG_(di_discard_ALL_debuginfo)( void ) +{ + DebugInfo *di, *di2; + di = debugInfo_list; + while (di) { + di2 = di->next; + VG_(printf)("XXX rm %p\n", di); + free_DebugInfo( di ); + di = di2; + } +} + + +/*------------------------------------------------------------*/ +/*--- Use of symbol table & location info to create ---*/ +/*--- plausible-looking stack dumps. ---*/ +/*------------------------------------------------------------*/ + +/* Search all symtabs that we know about to locate ptr. If found, set + *pdi to the relevant DebugInfo, and *symno to the symtab entry + *number within that. If not found, *psi is set to NULL. + If findText==True, only text symbols are searched for. + If findText==False, only data symbols are searched for. +*/ +static void search_all_symtabs ( Addr ptr, /*OUT*/DebugInfo** pdi, + /*OUT*/Word* symno, + Bool match_anywhere_in_sym, + Bool findText ) +{ + Word sno; + DebugInfo* di; + Bool inRange; + + for (di = debugInfo_list; di != NULL; di = di->next) { + + if (findText) { + inRange = di->text_present + && di->text_size > 0 + && di->text_avma <= ptr + && ptr < di->text_avma + di->text_size; + } else { + inRange = (di->data_present + && di->data_size > 0 + && di->data_avma <= ptr + && ptr < di->data_avma + di->data_size) + || + (di->sdata_present + && di->sdata_size > 0 + && di->sdata_avma <= ptr + && ptr < di->sdata_avma + di->sdata_size) + || + (di->bss_present + && di->bss_size > 0 + && di->bss_avma <= ptr + && ptr < di->bss_avma + di->bss_size) + || + (di->sbss_present + && di->sbss_size > 0 + && di->sbss_avma <= ptr + && ptr < di->sbss_avma + di->sbss_size) + || + (di->rodata_present + && di->rodata_size > 0 + && di->rodata_avma <= ptr + && ptr < di->rodata_avma + di->rodata_size); + } + + if (!inRange) continue; + + sno = ML_(search_one_symtab) ( + di, ptr, match_anywhere_in_sym, findText ); + if (sno == -1) goto not_found; + *symno = sno; + *pdi = di; + return; + + } + not_found: + *pdi = NULL; +} + + +/* Search all loctabs that we know about to locate ptr. If found, set + *pdi to the relevant DebugInfo, and *locno to the loctab entry + *number within that. If not found, *pdi is set to NULL. */ +static void search_all_loctabs ( Addr ptr, /*OUT*/DebugInfo** pdi, + /*OUT*/Word* locno ) +{ + Word lno; + DebugInfo* di; + for (di = debugInfo_list; di != NULL; di = di->next) { + if (di->text_present + && di->text_size > 0 + && di->text_avma <= ptr + && ptr < di->text_avma + di->text_size) { + lno = ML_(search_one_loctab) ( di, ptr ); + if (lno == -1) goto not_found; + *locno = lno; + *pdi = di; + return; + } + } + not_found: + *pdi = NULL; +} + + +/* The whole point of this whole big deal: map a code address to a + plausible symbol name. Returns False if no idea; otherwise True. + Caller supplies buf and nbuf. If do_cxx_demangling is False, don't do + C++ demangling, regardless of VG_(clo_demangle) -- probably because the + call has come from VG_(get_fnname_raw)(). findText + indicates whether we're looking for a text symbol or a data symbol + -- caller must choose one kind or the other. */ +static +Bool get_sym_name ( Bool do_cxx_demangling, Bool do_z_demangling, + Bool do_below_main_renaming, + Addr a, Char* buf, Int nbuf, + Bool match_anywhere_in_sym, Bool show_offset, + Bool findText, /*OUT*/PtrdiffT* offsetP ) +{ + DebugInfo* di; + Word sno; + PtrdiffT offset; + + search_all_symtabs ( a, &di, &sno, match_anywhere_in_sym, findText ); + if (di == NULL) + return False; + + VG_(demangle) ( do_cxx_demangling, do_z_demangling, + di->symtab[sno].name, buf, nbuf ); + + /* Do the below-main hack */ + // To reduce the endless nuisance of multiple different names + // for "the frame below main()" screwing up the testsuite, change all + // known incarnations of said into a single name, "(below main)", if + // --show-below-main=yes. + if ( do_below_main_renaming && ! VG_(clo_show_below_main) && + Vg_FnNameBelowMain == VG_(get_fnname_kind)(buf) ) + { + VG_(strncpy_safely)(buf, "(below main)", nbuf); + } + offset = a - di->symtab[sno].addr; + if (offsetP) *offsetP = offset; + + if (show_offset && offset != 0) { + Char buf2[12]; + Char* symend = buf + VG_(strlen)(buf); + Char* end = buf + nbuf; + Int len; + + len = VG_(sprintf)(buf2, "%c%ld", + offset < 0 ? '-' : '+', + offset < 0 ? -offset : offset); + vg_assert(len < (Int)sizeof(buf2)); + + if (len < (end - symend)) { + Char *cp = buf2; + VG_(memcpy)(symend, cp, len+1); + } + } + + buf[nbuf-1] = 0; /* paranoia */ + + return True; +} + +/* ppc64-linux only: find the TOC pointer (R2 value) that should be in + force at the entry point address of the function containing + guest_code_addr. Returns 0 if not known. */ +Addr VG_(get_tocptr) ( Addr guest_code_addr ) +{ + DebugInfo* si; + Word sno; + search_all_symtabs ( guest_code_addr, + &si, &sno, + True/*match_anywhere_in_fun*/, + True/*consider text symbols only*/ ); + if (si == NULL) + return 0; + else + return si->symtab[sno].tocptr; +} + +/* This is available to tools... always demangle C++ names, + match anywhere in function, but don't show offsets. */ +Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf ) +{ + return get_sym_name ( /*C++-demangle*/True, /*Z-demangle*/True, + /*below-main-renaming*/True, + a, buf, nbuf, + /*match_anywhere_in_fun*/True, + /*show offset?*/False, + /*text syms only*/True, + /*offsetP*/NULL ); +} + +/* This is available to tools... always demangle C++ names, + match anywhere in function, and show offset if nonzero. */ +Bool VG_(get_fnname_w_offset) ( Addr a, Char* buf, Int nbuf ) +{ + return get_sym_name ( /*C++-demangle*/True, /*Z-demangle*/True, + /*below-main-renaming*/True, + a, buf, nbuf, + /*match_anywhere_in_fun*/True, + /*show offset?*/True, + /*text syms only*/True, + /*offsetP*/NULL ); +} + +/* This is available to tools... always demangle C++ names, + only succeed if 'a' matches first instruction of function, + and don't show offsets. */ +Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf ) +{ + return get_sym_name ( /*C++-demangle*/True, /*Z-demangle*/True, + /*below-main-renaming*/True, + a, buf, nbuf, + /*match_anywhere_in_fun*/False, + /*show offset?*/False, + /*text syms only*/True, + /*offsetP*/NULL ); +} + +/* This is only available to core... don't C++-demangle, don't Z-demangle, + don't rename below-main, match anywhere in function, and don't show + offsets. */ +Bool VG_(get_fnname_raw) ( Addr a, Char* buf, Int nbuf ) +{ + return get_sym_name ( /*C++-demangle*/False, /*Z-demangle*/False, + /*below-main-renaming*/False, + a, buf, nbuf, + /*match_anywhere_in_fun*/True, + /*show offset?*/False, + /*text syms only*/True, + /*offsetP*/NULL ); +} + +/* This is only available to core... don't demangle C++ names, but do + do Z-demangling and below-main-renaming, match anywhere in function, and + don't show offsets. */ +Bool VG_(get_fnname_no_cxx_demangle) ( Addr a, Char* buf, Int nbuf ) +{ + return get_sym_name ( /*C++-demangle*/False, /*Z-demangle*/True, + /*below-main-renaming*/True, + a, buf, nbuf, + /*match_anywhere_in_fun*/True, + /*show offset?*/False, + /*text syms only*/True, + /*offsetP*/NULL ); +} + +Vg_FnNameKind VG_(get_fnname_kind) ( Char* name ) +{ + if (VG_STREQ("main", name)) { + return Vg_FnNameMain; + + } else if ( +#if defined(VGO_linux) + VG_STREQ("__libc_start_main", name) || // glibc glibness + VG_STREQ("generic_start_main", name) || // Yellow Dog doggedness +#elif defined(VGO_aix5) + VG_STREQ("__start", name) || // AIX aches +#else +# error Unknown OS +#endif + 0) { + return Vg_FnNameBelowMain; + + } else { + return Vg_FnNameNormal; + } +} + +Vg_FnNameKind VG_(get_fnname_kind_from_IP) ( Addr ip ) +{ + // We don't need a big buffer; all the special names are small. + #define BUFLEN 50 + Char buf[50]; + + // We don't demangle, because it's faster not to, and the special names + // we're looking for won't be demangled. + if (VG_(get_fnname_raw) ( ip, buf, BUFLEN )) { + buf[BUFLEN-1] = '\0'; // paranoia + return VG_(get_fnname_kind)(buf); + } else { + return Vg_FnNameNormal; // Don't know the name, treat it as normal. + } +} + +/* Looks up data_addr in the collection of data symbols, and if found + puts its name (or as much as will fit) into dname[0 .. n_dname-1], + which is guaranteed to be zero terminated. Also data_addr's offset + from the symbol start is put into *offset. */ +Bool VG_(get_datasym_and_offset)( Addr data_addr, + /*OUT*/Char* dname, Int n_dname, + /*OUT*/PtrdiffT* offset ) +{ + Bool ok; + vg_assert(n_dname > 1); + ok = get_sym_name ( /*C++-demangle*/False, /*Z-demangle*/False, + /*below-main-renaming*/False, + data_addr, dname, n_dname, + /*match_anywhere_in_sym*/True, + /*show offset?*/False, + /*data syms only please*/False, + offset ); + if (!ok) + return False; + dname[n_dname-1] = 0; + return True; +} + +/* Map a code address to the name of a shared object file or the + executable. Returns False if no idea; otherwise True. Doesn't + require debug info. Caller supplies buf and nbuf. */ +Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf ) +{ + Int used; + DebugInfo* di; + const NSegment *seg; + HChar* filename; + vg_assert(nbuf > 0); + /* Look in the debugInfo_list to find the name. In most cases we + expect this to produce a result. */ + for (di = debugInfo_list; di != NULL; di = di->next) { + if (di->text_present + && di->text_size > 0 + && di->text_avma <= a + && a < di->text_avma + di->text_size) { + VG_(strncpy_safely)(buf, di->filename, nbuf); + if (di->memname) { + used = VG_(strlen)(buf); + if (used < nbuf) + VG_(strncpy_safely)(&buf[used], "(", nbuf-used); + used = VG_(strlen)(buf); + if (used < nbuf) + VG_(strncpy_safely)(&buf[used], di->memname, nbuf-used); + used = VG_(strlen)(buf); + if (used < nbuf) + VG_(strncpy_safely)(&buf[used], ")", nbuf-used); + } + buf[nbuf-1] = 0; + return True; + } + } + /* Last-ditch fallback position: if we don't find the address in + the debugInfo_list, ask the address space manager whether it + knows the name of the file associated with this mapping. This + allows us to print the names of exe/dll files in the stack trace + when running programs under wine. */ + if ( (seg = VG_(am_find_nsegment(a))) != NULL + && (filename = VG_(am_get_filename)(seg)) != NULL ) { + VG_(strncpy_safely)(buf, filename, nbuf); + return True; + } + return False; +} + +/* Map a code address to its DebugInfo. Returns NULL if not found. Doesn't + require debug info. */ +DebugInfo* VG_(find_seginfo) ( Addr a ) +{ + DebugInfo* di; + for (di = debugInfo_list; di != NULL; di = di->next) { + if (di->text_present + && di->text_size > 0 + && di->text_avma <= a + && a < di->text_avma + di->text_size) { + return di; + } + } + return NULL; +} + +/* Map a code address to a filename. Returns True if successful. */ +Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename ) +{ + DebugInfo* si; + Word locno; + search_all_loctabs ( a, &si, &locno ); + if (si == NULL) + return False; + VG_(strncpy_safely)(filename, si->loctab[locno].filename, n_filename); + return True; +} + +/* Map a code address to a line number. Returns True if successful. */ +Bool VG_(get_linenum)( Addr a, UInt* lineno ) +{ + DebugInfo* si; + Word locno; + search_all_loctabs ( a, &si, &locno ); + if (si == NULL) + return False; + *lineno = si->loctab[locno].lineno; + + return True; +} + +/* Map a code address to a filename/line number/dir name info. + See prototype for detailed description of behaviour. +*/ +Bool VG_(get_filename_linenum) ( Addr a, + /*OUT*/Char* filename, Int n_filename, + /*OUT*/Char* dirname, Int n_dirname, + /*OUT*/Bool* dirname_available, + /*OUT*/UInt* lineno ) +{ + DebugInfo* si; + Word locno; + + vg_assert( (dirname == NULL && dirname_available == NULL) + || + (dirname != NULL && dirname_available != NULL) ); + + search_all_loctabs ( a, &si, &locno ); + if (si == NULL) { + if (dirname_available) { + *dirname_available = False; + *dirname = 0; + } + return False; + } + + VG_(strncpy_safely)(filename, si->loctab[locno].filename, n_filename); + *lineno = si->loctab[locno].lineno; + + if (dirname) { + /* caller wants directory info too .. */ + vg_assert(n_dirname > 0); + if (si->loctab[locno].dirname) { + /* .. and we have some */ + *dirname_available = True; + VG_(strncpy_safely)(dirname, si->loctab[locno].dirname, + n_dirname); + } else { + /* .. but we don't have any */ + *dirname_available = False; + *dirname = 0; + } + } + + return True; +} + + +/* Map a function name to its entry point and toc pointer. Is done by + sequential search of all symbol tables, so is very slow. To + mitigate the worst performance effects, you may specify a soname + pattern, and only objects matching that pattern are searched. + Therefore specify "*" to search all the objects. On TOC-afflicted + platforms, a symbol is deemed to be found only if it has a nonzero + TOC pointer. */ +Bool VG_(lookup_symbol_SLOW)(UChar* sopatt, UChar* name, + Addr* pEnt, Addr* pToc) +{ + Bool require_pToc = False; + Int i; + DebugInfo* si; + Bool debug = False; +# if defined(VG_PLAT_USES_PPCTOC) + require_pToc = True; +# endif + for (si = debugInfo_list; si; si = si->next) { + if (debug) + VG_(printf)("lookup_symbol_SLOW: considering %s\n", si->soname); + if (!VG_(string_match)(sopatt, si->soname)) { + if (debug) + VG_(printf)(" ... skip\n"); + continue; + } + for (i = 0; i < si->symtab_used; i++) { + if (0==VG_(strcmp)(name, si->symtab[i].name) + && (require_pToc ? si->symtab[i].tocptr : True)) { + *pEnt = si->symtab[i].addr; + *pToc = si->symtab[i].tocptr; + return True; + } + } + } + return False; +} + + +/* VG_(describe_IP): print into buf info on code address, function + name and filename. */ + +/* Copy str into buf starting at n, but not going past buf[n_buf-1] + and always ensuring that buf is zero-terminated. */ + +static Int putStr ( Int n, Int n_buf, Char* buf, Char* str ) +{ + vg_assert(n_buf > 0); + vg_assert(n >= 0 && n < n_buf); + for (; n < n_buf-1 && *str != 0; n++,str++) + buf[n] = *str; + vg_assert(n >= 0 && n < n_buf); + buf[n] = '\0'; + return n; +} + +/* Same as putStr, but escaping chars for XML output, and + also not adding more than count chars to n_buf. */ + +static Int putStrEsc ( Int n, Int n_buf, Int count, Char* buf, Char* str ) +{ + Char alt[2]; + vg_assert(n_buf > 0); + vg_assert(count >= 0 && count < n_buf); + vg_assert(n >= 0 && n < n_buf); + for (; *str != 0; str++) { + vg_assert(count >= 0); + if (count <= 0) + goto done; + switch (*str) { + case '&': + if (count < 5) goto done; + n = putStr( n, n_buf, buf, "&"); + count -= 5; + break; + case '<': + if (count < 4) goto done; + n = putStr( n, n_buf, buf, "<"); + count -= 4; + break; + case '>': + if (count < 4) goto done; + n = putStr( n, n_buf, buf, ">"); + count -= 4; + break; + default: + if (count < 1) goto done; + alt[0] = *str; + alt[1] = 0; + n = putStr( n, n_buf, buf, alt ); + count -= 1; + break; + } + } + done: + vg_assert(count >= 0); /* should not go -ve in loop */ + vg_assert(n >= 0 && n < n_buf); + return n; +} + +Char* VG_(describe_IP)(Addr eip, Char* buf, Int n_buf) +{ +# define APPEND(_str) \ + n = putStr(n, n_buf, buf, _str) +# define APPEND_ESC(_count,_str) \ + n = putStrEsc(n, n_buf, (_count), buf, (_str)) +# define BUF_LEN 4096 + + UInt lineno; + UChar ibuf[50]; + Int n = 0; + static UChar buf_fn[BUF_LEN]; + static UChar buf_obj[BUF_LEN]; + static UChar buf_srcloc[BUF_LEN]; + static UChar buf_dirname[BUF_LEN]; + Bool know_dirinfo = False; + Bool know_fnname = VG_(clo_sym_offsets) + ? VG_(get_fnname_w_offset) (eip, buf_fn, BUF_LEN) + : VG_(get_fnname) (eip, buf_fn, BUF_LEN); + Bool know_objname = VG_(get_objname)(eip, buf_obj, BUF_LEN); + Bool know_srcloc = VG_(get_filename_linenum)( + eip, + buf_srcloc, BUF_LEN, + buf_dirname, BUF_LEN, &know_dirinfo, + &lineno + ); + if (VG_(clo_xml)) { + + Bool human_readable = True; + HChar* maybe_newline = human_readable ? "\n " : ""; + HChar* maybe_newline2 = human_readable ? "\n " : ""; + + /* Print in XML format, dumping in as much info as we know. + Ensure all tags are balanced even if the individual strings + are too long. Allocate 1/10 of BUF_LEN to the object name, + 6/10s to the function name, 1/10 to the directory name and + 1/10 to the file name, leaving 1/10 for all the fixed-length + stuff. */ + APPEND("<frame>"); + VG_(sprintf)(ibuf,"<ip>0x%llX</ip>", (ULong)eip); + APPEND(maybe_newline); + APPEND(ibuf); + if (know_objname) { + APPEND(maybe_newline); + APPEND("<obj>"); + APPEND_ESC(1*BUF_LEN/10, buf_obj); + APPEND("</obj>"); + } + if (know_fnname) { + APPEND(maybe_newline); + APPEND("<fn>"); + APPEND_ESC(6*BUF_LEN/10, buf_fn); + APPEND("</fn>"); + } + if (know_srcloc) { + if (know_dirinfo) { + APPEND(maybe_newline); + APPEND("<dir>"); + APPEND_ESC(1*BUF_LEN/10, buf_dirname); + APPEND("</dir>"); + } + APPEND(maybe_newline); + APPEND("<file>"); + APPEND_ESC(1*BUF_LEN/10, buf_srcloc); + APPEND("</file>"); + APPEND(maybe_newline); + APPEND("<line>"); + VG_(sprintf)(ibuf,"%d",lineno); + APPEND(ibuf); + APPEND("</line>"); + } + APPEND(maybe_newline2); + APPEND("</frame>"); + + } else { + + /* Print for humans to read */ + // + // Possible forms: + // + // 0x80483BF: really (a.c:20) + // 0x80483BF: really (in /foo/a.out) + // 0x80483BF: really (in ???) + // 0x80483BF: ??? (in /foo/a.out) + // 0x80483BF: ??? (a.c:20) + // 0x80483BF: ??? + // + VG_(sprintf)(ibuf,"0x%llX: ", (ULong)eip); + APPEND(ibuf); + if (know_fnname) { + APPEND(buf_fn); + } else { + APPEND("???"); + } + if (know_srcloc) { + APPEND(" ("); + APPEND(buf_srcloc); + APPEND(":"); + VG_(sprintf)(ibuf,"%d",lineno); + APPEND(ibuf); + APPEND(")"); + } else if (know_objname) { + APPEND(" (in "); + APPEND(buf_obj); + APPEND(")"); + } else if (know_fnname) { + // Nb: do this in two steps because "??)" is a trigraph! + APPEND(" (in ???"); + APPEND(")"); + } + + } + return buf; + +# undef APPEND +# undef APPEND_ESC +# undef BUF_LEN +} + + +/*--------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL: FOR UNWINDING THE STACK USING ---*/ +/*--- DWARF3 .eh_frame INFO ---*/ +/*--- ---*/ +/*--------------------------------------------------------------*/ + +/* Gather up all the constant pieces of info needed to evaluate + a CfiExpr into one convenient struct. */ +typedef + struct { + Addr ipHere; + Addr spHere; + Addr fpHere; + Addr min_accessible; + Addr max_accessible; + } + CfiExprEvalContext; + +/* Evaluate the CfiExpr rooted at ix in exprs given the context eec. + *ok is set to False on failure, but not to True on success. The + caller must set it to True before calling. */ +static +UWord evalCfiExpr ( XArray* exprs, Int ix, + CfiExprEvalContext* eec, Bool* ok ) +{ + UWord wL, wR; + Addr a; + CfiExpr* e = VG_(indexXA)( exprs, ix ); + switch (e->tag) { + case Cex_Binop: + wL = evalCfiExpr( exprs, e->Cex.Binop.ixL, eec, ok ); + if (!(*ok)) return 0; + wR = evalCfiExpr( exprs, e->Cex.Binop.ixR, eec, ok ); + if (!(*ok)) return 0; + switch (e->Cex.Binop.op) { + case Cop_Add: return wL + wR; + case Cop_Sub: return wL - wR; + case Cop_And: return wL & wR; + case Cop_Mul: return wL * wR; + default: goto unhandled; + } + /*NOTREACHED*/ + case Cex_CfiReg: + switch (e->Cex.CfiReg.reg) { + case Creg_IP: return (Addr)eec->ipHere; + case Creg_SP: return (Addr)eec->spHere; + case Creg_FP: return (Addr)eec->fpHere; + default: goto unhandled; + } + /*NOTREACHED*/ + case Cex_Const: + return e->Cex.Const.con; + case Cex_Deref: + a = evalCfiExpr( exprs, e->Cex.Deref.ixAddr, eec, ok ); + if (!(*ok)) return 0; + if (a < eec->min_accessible + || (a + sizeof(UWord) - 1) > eec->max_accessible) { + *ok = False; + return 0; + } + /* let's hope it doesn't trap! */ + return * ((UWord*)a); + default: + goto unhandled; + } + /*NOTREACHED*/ + unhandled: + VG_(printf)("\n\nevalCfiExpr: unhandled\n"); + ML_(ppCfiExpr)( exprs, ix ); + VG_(printf)("\n"); + vg_assert(0); + /*NOTREACHED*/ + return 0; +} + + +/* Search all the DebugInfos in the entire system, to find the DiCfSI + that pertains to 'ip'. + + If found, set *diP to the DebugInfo in which it resides, and + *ixP to the index in that DebugInfo's cfsi array. + + If not found, set *diP to (DebugInfo*)1 and *ixP to zero. +*/ +__attribute__((noinline)) +static void find_DiCfSI ( /*OUT*/DebugInfo** diP, + /*OUT*/Word* ixP, + Addr ip ) +{ + DebugInfo* di; + Word i = -1; + + static UWord n_search = 0; + static UWord n_steps = 0; + n_search++; + + if (0) VG_(printf)("search for %#lx\n", ip); + + for (di = debugInfo_list; di != NULL; di = di->next) { + Word j; + n_steps++; + + /* Use the per-DebugInfo summary address ranges to skip + inapplicable DebugInfos quickly. */ + if (di->cfsi_used == 0) + continue; + if (ip < di->cfsi_minavma || ip > di->cfsi_maxavma) + continue; + + /* It might be in this DebugInfo. Search it. */ + j = ML_(search_one_cfitab)( di, ip ); + vg_assert(j >= -1 && j < (Word)di->cfsi_used); + + if (j != -1) { + i = j; + break; /* found it */ + } + } + + if (i == -1) { + + /* we didn't find it. */ + *diP = (DebugInfo*)1; + *ixP = 0; + + } else { + + /* found it. */ + /* ensure that di is 4-aligned (at least), so it can't possibly + be equal to (DebugInfo*)1. */ + vg_assert(di && VG_IS_4_ALIGNED(di)); + vg_assert(i >= 0 && i < di->cfsi_used); + *diP = di; + *ixP = i; + + /* Start of performance-enhancing hack: once every 64 (chosen + hackily after profiling) successful searches, move the found + DebugInfo one step closer to the start of the list. This + makes future searches cheaper. For starting konqueror on + amd64, this in fact reduces the total amount of searching + done by the above find-the-right-DebugInfo loop by more than + a factor of 20. */ + if ((n_search & 0xF) == 0) { + /* Move di one step closer to the start of the list. */ + move_DebugInfo_one_step_forward( di ); + } + /* End of performance-enhancing hack. */ + + if (0 && ((n_search & 0x7FFFF) == 0)) + VG_(printf)("find_DiCfSI: %lu searches, " + "%lu DebugInfos looked at\n", + n_search, n_steps); + + } + +} + + +/* Now follows a mechanism for caching queries to find_DiCfSI, since + they are extremely frequent on amd64-linux, during stack unwinding. + + Each cache entry binds an ip value to a (di, ix) pair. Possible + values: + + di is non-null, ix >= 0 ==> cache slot in use, "di->cfsi[ix]" + di is (DebugInfo*)1 ==> cache slot in use, no associated di + di is NULL ==> cache slot not in use + + Hence simply zeroing out the entire cache invalidates all + entries. + + Why not map ip values directly to DiCfSI*'s? Because this would + cause problems if/when the cfsi array is moved due to resizing. + Instead we cache .cfsi array index value, which should be invariant + across resizing. (That said, I don't think the current + implementation will resize whilst during queries, since the DiCfSI + records are added all at once, when the debuginfo for an object is + read, and is not changed ever thereafter. */ + +#define N_CFSI_CACHE 511 + +typedef + struct { Addr ip; DebugInfo* di; Word ix; } + CFSICacheEnt; + +static CFSICacheEnt cfsi_cache[N_CFSI_CACHE]; + +static void cfsi_cache__invalidate ( void ) { + VG_(memset)(&cfsi_cache, 0, sizeof(cfsi_cache)); +} + + +/* The main function for DWARF2/3 CFI-based stack unwinding. + Given an IP/SP/FP triple, produce the IP/SP/FP values for the + previous frame, if possible. */ +/* Returns True if OK. If not OK, *{ip,sp,fp}P are not changed. */ +/* NOTE: this function may rearrange the order of entries in the + DebugInfo list. */ +Bool VG_(use_CF_info) ( /*MOD*/Addr* ipP, + /*MOD*/Addr* spP, + /*MOD*/Addr* fpP, + Addr min_accessible, + Addr max_accessible ) +{ + Bool ok; + DebugInfo* di; + DiCfSI* cfsi = NULL; + Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev; + + CfiExprEvalContext eec; + + static UWord n_q = 0, n_m = 0; + n_q++; + if (0 && 0 == (n_q & 0x1FFFFF)) + VG_(printf)("QQQ %lu %lu\n", n_q, n_m); + + { UWord hash = (*ipP) % N_CFSI_CACHE; + CFSICacheEnt* ce = &cfsi_cache[hash]; + + if (LIKELY(ce->ip == *ipP) && LIKELY(ce->di != NULL)) { + /* found an entry in the cache .. */ + } else { + /* not found in cache. Search and update. */ + n_m++; + ce->ip = *ipP; + find_DiCfSI( &ce->di, &ce->ix, *ipP ); + } + + if (UNLIKELY(ce->di == (DebugInfo*)1)) { + /* no DiCfSI for this address */ + cfsi = NULL; + di = NULL; + } else { + /* found a DiCfSI for this address */ + di = ce->di; + cfsi = &di->cfsi[ ce->ix ]; + } + } + + if (UNLIKELY(cfsi == NULL)) + return False; /* no info. Nothing we can do. */ + + if (0) { + VG_(printf)("found cfisi: "); + ML_(ppDiCfSI)(di->cfsi_exprs, cfsi); + } + + ipPrev = spPrev = fpPrev = 0; + + ipHere = *ipP; + spHere = *spP; + fpHere = *fpP; + + /* First compute the CFA. */ + cfa = 0; + switch (cfsi->cfa_how) { + case CFIC_SPREL: + cfa = cfsi->cfa_off + spHere; + break; + case CFIC_FPREL: + cfa = cfsi->cfa_off + fpHere; + break; + case CFIC_EXPR: + if (0) { + VG_(printf)("CFIC_EXPR: "); + ML_(ppCfiExpr)(di->cfsi_exprs, cfsi->cfa_off); + VG_(printf)("\n"); + } + eec.ipHere = ipHere; + eec.spHere = spHere; + eec.fpHere = fpHere; + eec.min_accessible = min_accessible; + eec.max_accessible = max_accessible; + ok = True; + cfa = evalCfiExpr(di->cfsi_exprs, cfsi->cfa_off, &eec, &ok ); + if (!ok) return False; + break; + default: + vg_assert(0); + } + + /* Now we know the CFA, use it to roll back the registers we're + interested in. */ + +# define COMPUTE(_prev, _here, _how, _off) \ + do { \ + switch (_how) { \ + case CFIR_UNKNOWN: \ + return False; \ + case CFIR_SAME: \ + _prev = _here; break; \ + case CFIR_MEMCFAREL: { \ + Addr a = cfa + (Word)_off; \ + if (a < min_accessible \ + || a > max_accessible-sizeof(Addr)) \ + return False; \ + _prev = *(Addr*)a; \ + break; \ + } \ + case CFIR_CFAREL: \ + _prev = cfa + (Word)_off; \ + break; \ + case CFIR_EXPR: \ + if (0) \ + ML_(ppCfiExpr)(di->cfsi_exprs,_off); \ + eec.ipHere = ipHere; \ + eec.spHere = spHere; \ + eec.fpHere = fpHere; \ + eec.min_accessible = min_accessible; \ + eec.max_accessible = max_accessible; \ + ok = True; \ + _prev = evalCfiExpr(di->cfsi_exprs, _off, &eec, &ok ); \ + if (!ok) return False; \ + break; \ + default: \ + vg_assert(0); \ + } \ + } while (0) + + COMPUTE(ipPrev, ipHere, cfsi->ra_how, cfsi->ra_off); + COMPUTE(spPrev, spHere, cfsi->sp_how, cfsi->sp_off); + COMPUTE(fpPrev, fpHere, cfsi->fp_how, cfsi->fp_off); + +# undef COMPUTE + + *ipP = ipPrev; + *spP = spPrev; + *fpP = fpPrev; + return True; +} + + +/*--------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL: FOR UNWINDING THE STACK USING ---*/ +/*--- MSVC FPO INFO ---*/ +/*--- ---*/ +/*--------------------------------------------------------------*/ + +Bool VG_(use_FPO_info) ( /*MOD*/Addr* ipP, + /*MOD*/Addr* spP, + /*MOD*/Addr* fpP, + Addr min_accessible, + Addr max_accessible ) +{ + Word i; + DebugInfo* di; + FPO_DATA* fpo = NULL; + Addr spHere; + + static UWord n_search = 0; + static UWord n_steps = 0; + n_search++; + + if (0) VG_(printf)("search FPO for %#lx\n", *ipP); + + for (di = debugInfo_list; di != NULL; di = di->next) { + n_steps++; + + /* Use the per-DebugInfo summary address ranges to skip + inapplicable DebugInfos quickly. */ + if (di->fpo == NULL) + continue; + if (*ipP < di->fpo_minavma || *ipP > di->fpo_maxavma) + continue; + + i = ML_(search_one_fpotab)( di, *ipP ); + if (i != -1) { + Word j; + if (0) { + /* debug printing only */ + VG_(printf)("look for %#lx size %ld i %ld\n", + *ipP, di->fpo_size, i); + for (j = 0; j < di->fpo_size; j++) + VG_(printf)("[%02ld] %#x %d\n", + j, di->fpo[j].ulOffStart, di->fpo[j].cbProcSize); + } + vg_assert(i >= 0 && i < di->fpo_size); + fpo = &di->fpo[i]; + break; + } + } + + if (fpo == NULL) + return False; + + if (0 && ((n_search & 0x7FFFF) == 0)) + VG_(printf)("VG_(use_FPO_info): %lu searches, " + "%lu DebugInfos looked at\n", + n_search, n_steps); + + + /* Start of performance-enhancing hack: once every 64 (chosen + hackily after profiling) successful searches, move the found + DebugInfo one step closer to the start of the list. This makes + future searches cheaper. For starting konqueror on amd64, this + in fact reduces the total amount of searching done by the above + find-the-right-DebugInfo loop by more than a factor of 20. */ + if ((n_search & 0x3F) == 0) { + /* Move si one step closer to the start of the list. */ + //move_DebugInfo_one_step_forward( di ); + } + /* End of performance-enhancing hack. */ + + if (0) { + VG_(printf)("found fpo: "); + //ML_(ppFPO)(fpo); + } + + /* + Stack layout is: + %esp-> + 4*.cbRegs {%edi, %esi, %ebp, %ebx} + 4*.cdwLocals + return_pc + 4*.cdwParams + prior_%esp-> + + Typical code looks like: + sub $4*.cdwLocals,%esp + Alternative to above for >=4KB (and sometimes for smaller): + mov $size,%eax + call __chkstk # WinNT performs page-by-page probe! + __chkstk is much like alloc(), except that on return + %eax= 5+ &CALL. Thus it could be used as part of + Position Independent Code to locate the Global Offset Table. + push %ebx + push %ebp + push %esi + Other once-only instructions often scheduled >here<. + push %edi + + If the pc is within the first .cbProlog bytes of the function, + then you must disassemble to see how many registers have been pushed, + because instructions in the prolog may be scheduled for performance. + The order of PUSH is always %ebx, %ebp, %esi, %edi, with trailing + registers not pushed when .cbRegs < 4. This seems somewhat strange + because %ebp is the register whose usage you want to minimize, + yet it is in the first half of the PUSH list. + + I don't know what happens when the compiler constructs an outgoing CALL. + %esp could move if outgoing parameters are PUSHed, and this affects + traceback for errors during the PUSHes. */ + + spHere = *spP; + + *ipP = *(Addr *)(spHere + 4*(fpo->cbRegs + fpo->cdwLocals)); + *spP = spHere + 4*(fpo->cbRegs + fpo->cdwLocals + 1 + fpo->cdwParams); + *fpP = *(Addr *)(spHere + 4*2); + return True; +} + + +/*--------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL: GENERATE DESCRIPTION OF DATA ADDRESSES ---*/ +/*--- FROM DWARF3 DEBUG INFO ---*/ +/*--- ---*/ +/*--------------------------------------------------------------*/ + +/* Evaluate the location expression/list for var, to see whether or + not data_addr falls within the variable. If so also return the + offset of data_addr from the start of the variable. Note that + regs, which supplies ip,sp,fp values, will be NULL for global + variables, and non-NULL for local variables. */ +static Bool data_address_is_in_var ( /*OUT*/PtrdiffT* offset, + XArray* /* TyEnt */ tyents, + DiVariable* var, + RegSummary* regs, + Addr data_addr, + const DebugInfo* di ) +{ + MaybeULong mul; + SizeT var_szB; + GXResult res; + Bool show = False; + + vg_assert(var->name); + vg_assert(var->gexpr); + + /* Figure out how big the variable is. */ + mul = ML_(sizeOfType)(tyents, var->typeR); + /* If this var has a type whose size is unknown, zero, or + impossibly large, it should never have been added. ML_(addVar) + should have rejected it. */ + vg_assert(mul.b == True); + vg_assert(mul.ul > 0); + if (sizeof(void*) == 4) vg_assert(mul.ul < (1ULL << 32)); + /* After this point, we assume we can truncate mul.ul to a host word + safely (without loss of info). */ + + var_szB = (SizeT)mul.ul; /* NB: truncate to host word */ + + if (show) { + VG_(printf)("VVVV: data_address_%#lx_is_in_var: %s :: ", + data_addr, var->name ); + ML_(pp_TyEnt_C_ishly)( tyents, var->typeR ); + VG_(printf)("\n"); + } + + /* ignore zero-sized vars; they can never match anything. */ + if (var_szB == 0) { + if (show) + VG_(printf)("VVVV: -> Fail (variable is zero sized)\n"); + return False; + } + + res = ML_(evaluate_GX)( var->gexpr, var->fbGX, regs, di ); + + if (show) { + VG_(printf)("VVVV: -> "); + ML_(pp_GXResult)( res ); + VG_(printf)("\n"); + } + + if (res.kind == GXR_Value + && res.word <= data_addr + && data_addr < res.word + var_szB) { + *offset = data_addr - res.word; + return True; + } else { + return False; + } +} + + +/* Format the acquired information into dname1[0 .. n_dname-1] and + dname2[0 .. n_dname-1] in an understandable way. Not so easy. + If frameNo is -1, this is assumed to be a global variable; else + a local variable. */ +static void format_message ( /*OUT*/Char* dname1, + /*OUT*/Char* dname2, + Int n_dname, + Addr data_addr, + DiVariable* var, + PtrdiffT var_offset, + PtrdiffT residual_offset, + XArray* /*UChar*/ described, + Int frameNo, + ThreadId tid ) +{ + Bool have_descr, have_srcloc; + UChar* vo_plural = var_offset == 1 ? "" : "s"; + UChar* ro_plural = residual_offset == 1 ? "" : "s"; + + vg_assert(frameNo >= -1); + vg_assert(dname1 && dname2 && n_dname > 1); + vg_assert(described); + vg_assert(var && var->name); + have_descr = VG_(sizeXA)(described) > 0 + && *(UChar*)VG_(indexXA)(described,0) != '\0'; + have_srcloc = var->fileName && var->lineNo > 0; + + dname1[0] = dname2[0] = '\0'; + + /* ------ local cases ------ */ + + if ( frameNo >= 0 && (!have_srcloc) && (!have_descr) ) { + /* no srcloc, no description: + Location 0x7fefff6cf is 543 bytes inside local var "a", + in frame #1 of thread 1 + */ + VG_(snprintf)( + dname1, n_dname, + "Location 0x%lx is %lu byte%s inside local var \"%s\",", + data_addr, var_offset, vo_plural, var->name ); + VG_(snprintf)( + dname2, n_dname, + "in frame #%d of thread %d", frameNo, (Int)tid); + } + else + if ( frameNo >= 0 && have_srcloc && (!have_descr) ) { + /* no description: + Location 0x7fefff6cf is 543 bytes inside local var "a" + declared at dsyms7.c:17, in frame #1 of thread 1 + */ + VG_(snprintf)( + dname1, n_dname, + "Location 0x%lx is %lu byte%s inside local var \"%s\"", + data_addr, var_offset, vo_plural, var->name ); + VG_(snprintf)( + dname2, n_dname, + "declared at %s:%d, in frame #%d of thread %d", + var->fileName, var->lineNo, frameNo, (Int)tid); + } + else + if ( frameNo >= 0 && (!have_srcloc) && have_descr ) { + /* no srcloc: + Location 0x7fefff6cf is 2 bytes inside a[3].xyzzy[21].c2 + in frame #1 of thread 1 + */ + VG_(snprintf)( + dname1, n_dname, + "Location 0x%lx is %lu byte%s inside %s%s", + data_addr, residual_offset, ro_plural, var->name, + (char*)(VG_(indexXA)(described,0)) ); + VG_(snprintf)( + dname2, n_dname, + "in frame #%d of thread %d", frameNo, (Int)tid); + } + else + if ( frameNo >= 0 && have_srcloc && have_descr ) { + /* Location 0x7fefff6cf is 2 bytes inside a[3].xyzzy[21].c2, + declared at dsyms7.c:17, in frame #1 of thread 1 */ + VG_(snprintf)( + dname1, n_dname, + "Location 0x%lx is %lu byte%s inside %s%s,", + data_addr, residual_offset, ro_plural, var->name, + (char*)(VG_(indexXA)(described,0)) ); + VG_(snprintf)( + dname2, n_dname, + "declared at %s:%d, in frame #%d of thread %d", + var->fileName, var->lineNo, frameNo, (Int)tid); + } + else + /* ------ global cases ------ */ + if ( frameNo >= -1 && (!have_srcloc) && (!have_descr) ) { + /* no srcloc, no description: + Location 0x7fefff6cf is 543 bytes inside global var "a" + */ + VG_(snprintf)( + dname1, n_dname, + "Location 0x%lx is %lu byte%s inside global var \"%s\"", + data_addr, var_offset, vo_plural, var->name ); + } + else + if ( frameNo >= -1 && have_srcloc && (!have_descr) ) { + /* no description: + Location 0x7fefff6cf is 543 bytes inside global var "a" + declared at dsyms7.c:17 + */ + VG_(snprintf)( + dname1, n_dname, + "Location 0x%lx is %lu byte%s inside global var \"%s\"", + data_addr, var_offset, vo_plural, var->name ); + VG_(snprintf)( + dname2, n_dname, + "declared at %s:%d", + var->fileName, var->lineNo); + } + else + if ( frameNo >= -1 && (!have_srcloc) && have_descr ) { + /* no srcloc: + Location 0x7fefff6cf is 2 bytes inside a[3].xyzzy[21].c2, + a global variable + */ + VG_(snprintf)( + dname1, n_dname, + "Location 0x%lx is %lu byte%s inside %s%s,", + data_addr, residual_offset, ro_plural, var->name, + (char*)(VG_(indexXA)(described,0)) ); + VG_(snprintf)( + dname2, n_dname, + "a global variable"); + } + else + if ( frameNo >= -1 && have_srcloc && have_descr ) { + /* Location 0x7fefff6cf is 2 bytes inside a[3].xyzzy[21].c2, + a global variable declared at dsyms7.c:17 */ + VG_(snprintf)( + dname1, n_dname, + "Location 0x%lx is %lu byte%s inside %s%s,", + data_addr, residual_offset, ro_plural, var->name, + (char*)(VG_(indexXA)(described,0)) ); + VG_(snprintf)( + dname2, n_dname, + "a global variable declared at %s:%d", + var->fileName, var->lineNo); + } + else + vg_assert(0); + + dname1[n_dname-1] = dname2[n_dname-1] = 0; +} + +/* Determine if data_addr is a local variable in the frame + characterised by (ip,sp,fp), and if so write its description into + dname{1,2}[0..n_dname-1], and return True. If not, return + False. */ +static +Bool consider_vars_in_frame ( /*OUT*/Char* dname1, + /*OUT*/Char* dname2, + Int n_dname, + Addr data_addr, + Addr ip, Addr sp, Addr fp, + /* shown to user: */ + ThreadId tid, Int frameNo ) +{ + Word i; + DebugInfo* di; + RegSummary regs; + Bool debug = False; + + static UInt n_search = 0; + static UInt n_steps = 0; + n_search++; + if (debug) + VG_(printf)("QQQQ: cvif: ip,sp,fp %#lx,%#lx,%#lx\n", ip,sp,fp); + /* first, find the DebugInfo that pertains to 'ip'. */ + for (di = debugInfo_list; di; di = di->next) { + n_steps++; + /* text segment missing? unlikely, but handle it .. */ + if (!di->text_present || di->text_size == 0) + continue; + /* Ok. So does this text mapping bracket the ip? */ + if (di->text_avma <= ip && ip < di->text_avma + di->text_size) + break; + } + + /* Didn't find it. Strange -- means ip is a code address outside + of any mapped text segment. Unlikely but not impossible -- app + could be generating code to run. */ + if (!di) + return False; + + if (0 && ((n_search & 0x1) == 0)) + VG_(printf)("consider_vars_in_frame: %u searches, " + "%u DebugInfos looked at\n", + n_search, n_steps); + /* Start of performance-enhancing hack: once every ??? (chosen + hackily after profiling) successful searches, move the found + DebugInfo one step closer to the start of the list. This makes + future searches cheaper. */ + if ((n_search & 0xFFFF) == 0) { + /* Move si one step closer to the start of the list. */ + move_DebugInfo_one_step_forward( di ); + } + /* End of performance-enhancing hack. */ + + /* any var info at all? */ + if (!di->varinfo) + return False; + + /* Work through the scopes from most deeply nested outwards, + looking for code address ranges that bracket 'ip'. The + variables on each such address range found are in scope right + now. Don't descend to level zero as that is the global + scope. */ + regs.ip = ip; + regs.sp = sp; + regs.fp = fp; + + /* "for each scope, working outwards ..." */ + for (i = VG_(sizeXA)(di->varinfo) - 1; i >= 1; i--) { + XArray* vars; + Word j; + DiAddrRange* arange; + OSet* this_scope + = *(OSet**)VG_(indexXA)( di->varinfo, i ); + if (debug) + VG_(printf)("QQQQ: considering scope %ld\n", (Word)i); + if (!this_scope) + continue; + /* Find the set of variables in this scope that + bracket the program counter. */ + arange = VG_(OSetGen_LookupWithCmp)( + this_scope, &ip, + ML_(cmp_for_DiAddrRange_range) + ); + if (!arange) + continue; + /* stay sane */ + vg_assert(arange->aMin <= arange->aMax); + /* It must bracket the ip we asked for, else + ML_(cmp_for_DiAddrRange_range) is somehow broken. */ + vg_assert(arange->aMin <= ip && ip <= arange->aMax); + /* It must have an attached XArray of DiVariables. */ + vars = arange->vars; + vg_assert(vars); + /* But it mustn't cover the entire address range. We only + expect that to happen for the global scope (level 0), which + we're not looking at here. Except, it may cover the entire + address range, but in that case the vars array must be + empty. */ + vg_assert(! (arange->aMin == (Addr)0 + && arange->aMax == ~(Addr)0 + && VG_(sizeXA)(vars) > 0) ); + for (j = 0; j < VG_(sizeXA)( vars ); j++) { + DiVariable* var = (DiVariable*)VG_(indexXA)( vars, j ); + PtrdiffT offset; + if (debug) + VG_(printf)("QQQQ: var:name=%s %#lx-%#lx %#lx\n", + var->name,arange->aMin,arange->aMax,ip); + if (data_address_is_in_var( &offset, di->admin_tyents, + var, ®s, + data_addr, di )) { + PtrdiffT residual_offset = 0; + XArray* described = ML_(describe_type)( &residual_offset, + di->admin_tyents, + var->typeR, offset ); + format_message( dname1, dname2, n_dname, + data_addr, var, offset, residual_offset, + described, frameNo, tid ); + VG_(deleteXA)( described ); + return True; + } + } + } + + return False; +} + +/* Try to form some description of data_addr by looking at the DWARF3 + debug info we have. This considers all global variables, and all + frames in the stacks of all threads. Result (or as much as will + fit) is put into into dname{1,2}[0 .. n_dname-1] and is guaranteed + to be zero terminated. */ +Bool VG_(get_data_description)( /*OUT*/Char* dname1, + /*OUT*/Char* dname2, + Int n_dname, + Addr data_addr ) +{ +# define N_FRAMES 8 + Addr ips[N_FRAMES], sps[N_FRAMES], fps[N_FRAMES]; + UInt n_frames; + + Addr stack_min, stack_max; + ThreadId tid; + Bool found; + DebugInfo* di; + Word j; + + vg_assert(n_dname > 1); + dname1[n_dname-1] = dname2[n_dname-1] = 0; + + if (0) VG_(printf)("get_data_description: dataaddr %#lx\n", data_addr); + /* First, see if data_addr is (or is part of) a global variable. + Loop over the DebugInfos we have. Check data_addr against the + outermost scope of all of them, as that should be a global + scope. */ + for (di = debugInfo_list; di != NULL; di = di->next) { + OSet* global_scope; + Word gs_size; + Addr zero; + DiAddrRange* global_arange; + Word i; + XArray* vars; + + /* text segment missing? unlikely, but handle it .. */ + if (!di->text_present || di->text_size == 0) + continue; + /* any var info at all? */ + if (!di->varinfo) + continue; + /* perhaps this object didn't contribute any vars at all? */ + if (VG_(sizeXA)( di->varinfo ) == 0) + continue; + global_scope = *(OSet**)VG_(indexXA)( di->varinfo, 0 ); + vg_assert(global_scope); + gs_size = VG_(OSetGen_Size)( global_scope ); + /* The global scope might be completely empty if this + compilation unit declared locals but nothing global. */ + if (gs_size == 0) + continue; + /* But if it isn't empty, then it must contain exactly one + element, which covers the entire address range. */ + vg_assert(gs_size == 1); + /* Fish out the global scope and check it is as expected. */ + zero = 0; + global_arange + = VG_(OSetGen_Lookup)( global_scope, &zero ); + /* The global range from (Addr)0 to ~(Addr)0 must exist */ + vg_assert(global_arange); + vg_assert(global_arange->aMin == (Addr)0 + && global_arange->aMax == ~(Addr)0); + /* Any vars in this range? */ + if (!global_arange->vars) + continue; + /* Ok, there are some vars in the global scope of this + DebugInfo. Wade through them and see if the data addresses + of any of them bracket data_addr. */ + vars = global_arange->vars; + for (i = 0; i < VG_(sizeXA)( vars ); i++) { + PtrdiffT offset; + DiVariable* var = (DiVariable*)VG_(indexXA)( vars, i ); + vg_assert(var->name); + /* Note we use a NULL RegSummary* here. It can't make any + sense for a global variable to have a location expression + which depends on a SP/FP/IP value. So don't supply any. + This means, if the evaluation of the location + expression/list requires a register, we have to let it + fail. */ + if (data_address_is_in_var( &offset, di->admin_tyents, var, + NULL/* RegSummary* */, + data_addr, di )) { + PtrdiffT residual_offset = 0; + XArray* described = ML_(describe_type)( &residual_offset, + di->admin_tyents, + var->typeR, offset ); + format_message( dname1, dname2, n_dname, + data_addr, var, offset, residual_offset, + described, -1/*frameNo*/, tid ); + VG_(deleteXA)( described ); + dname1[n_dname-1] = dname2[n_dname-1] = 0; + return True; + } + } + } + + /* Ok, well it's not a global variable. So now let's snoop around + in the stacks of all the threads. First try to figure out which + thread's stack data_addr is in. */ + + /* --- KLUDGE --- Try examining the top frame of all thread stacks. + This finds variables which are not stack allocated but are not + globally visible either; specifically it appears to pick up + variables which are visible only within a compilation unit. + These will have the address range of the compilation unit and + tend to live at Scope level 1. */ + VG_(thread_stack_reset_iter)(&tid); + while ( VG_(thread_stack_next)(&tid, &stack_min, &stack_max) ) { + if (stack_min >= stack_max) + continue; /* ignore obviously stupid cases */ + if (consider_vars_in_frame( dname1, dname2, n_dname, + data_addr, + VG_(get_IP)(tid), + VG_(get_SP)(tid), + VG_(get_FP)(tid), tid, 0 )) { + dname1[n_dname-1] = dname2[n_dname-1] = 0; + return True; + } + } + /* --- end KLUDGE --- */ + + /* Perhaps it's on a thread's stack? */ + found = False; + VG_(thread_stack_reset_iter)(&tid); + while ( VG_(thread_stack_next)(&tid, &stack_min, &stack_max) ) { + if (stack_min >= stack_max) + continue; /* ignore obviously stupid cases */ + if (stack_min - VG_STACK_REDZONE_SZB <= data_addr + && data_addr <= stack_max) { + found = True; + break; + } + } + if (!found) { + dname1[n_dname-1] = dname2[n_dname-1] = 0; + return False; + } + + /* We conclude data_addr is in thread tid's stack. Unwind the + stack to get a bunch of (ip,sp,fp) triples describing the + frames, and for each frame, consider the local variables. */ + n_frames = VG_(get_StackTrace)( tid, ips, N_FRAMES, + sps, fps, 0/*first_ip_delta*/ ); + + /* As a result of KLUDGE above, starting the loop at j = 0 + duplicates examination of the top frame and so isn't necessary. + Oh well. */ + vg_assert(n_frames >= 0 && n_frames <= N_FRAMES); + for (j = 0; j < n_frames; j++) { + if (consider_vars_in_frame( dname1, dname2, n_dname, + data_addr, + ips[j], + sps[j], fps[j], tid, j )) { + dname1[n_dname-1] = dname2[n_dname-1] = 0; + return True; + } + /* Now, it appears that gcc sometimes appears to produce + location lists whose ranges don't actually cover the call + instruction, even though the address of the variable in + question is passed as a parameter in the call. AFAICS this + is simply a bug in gcc - how can the variable be claimed not + exist in memory (on the stack) for the duration of a call in + which its address is passed? But anyway, in the particular + case I investigated (memcheck/tests/varinfo6.c, call to croak + on line 2999, local var budget declared at line 3115 + appearing not to exist across the call to mainSort on line + 3143, "gcc.orig (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2)" on + amd64), the variable's location list does claim it exists + starting at the first byte of the first instruction after the + call instruction. So, call consider_vars_in_frame a second + time, but this time add 1 to the IP. GDB handles this + example with no difficulty, which leads me to believe that + either (1) I misunderstood something, or (2) GDB has an + equivalent kludge. */ + if (j > 0 /* this is a non-innermost frame */ + && consider_vars_in_frame( dname1, dname2, n_dname, + data_addr, + ips[j] + 1, + sps[j], fps[j], tid, j )) { + dname1[n_dname-1] = dname2[n_dname-1] = 0; + return True; + } + } + + /* We didn't find anything useful. */ + dname1[n_dname-1] = dname2[n_dname-1] = 0; + return False; +# undef N_FRAMES +} + + +////////////////////////////////////////////////////////////////// +// // +// Support for other kinds of queries to the Dwarf3 var info // +// // +////////////////////////////////////////////////////////////////// + +/* Figure out if the variable 'var' has a location that is linearly + dependent on a stack pointer value, or a frame pointer value, and + if it is, add a description of it to 'blocks'. Otherwise ignore + it. If 'arrays_only' is True, also ignore it unless it has an + array type. */ + +static +void analyse_deps ( /*MOD*/XArray* /* of FrameBlock */ blocks, + XArray* /* TyEnt */ tyents, + Addr ip, const DebugInfo* di, DiVariable* var, + Bool arrays_only ) +{ + GXResult res_sp_6k, res_sp_7k, res_fp_6k, res_fp_7k; + RegSummary regs; + MaybeULong mul; + Bool isVec; + TyEnt* ty; + + Bool debug = False; + if (0&&debug) + VG_(printf)("adeps: var %s\n", var->name ); + + /* Figure out how big the variable is. */ + mul = ML_(sizeOfType)(tyents, var->typeR); + /* If this var has a type whose size is unknown, zero, or + impossibly large, it should never have been added. ML_(addVar) + should have rejected it. */ + vg_assert(mul.b == True); + vg_assert(mul.ul > 0); + if (sizeof(void*) == 4) vg_assert(mul.ul < (1ULL << 32)); + /* After this point, we assume we can truncate mul.ul to a host word + safely (without loss of info). */ + + /* skip if non-array and we're only interested in arrays */ + ty = ML_(TyEnts__index_by_cuOff)( tyents, NULL, var->typeR ); + vg_assert(ty); + vg_assert(ty->tag == Te_UNKNOWN || ML_(TyEnt__is_type)(ty)); + if (ty->tag == Te_UNKNOWN) + return; /* perhaps we should complain in this case? */ + isVec = ty->tag == Te_TyArray; + if (arrays_only && !isVec) + return; + + if (0) {ML_(pp_TyEnt_C_ishly)(tyents, var->typeR); + VG_(printf)(" %s\n", var->name);} + + /* Do some test evaluations of the variable's location expression, + in order to guess whether it is sp-relative, fp-relative, or + none. A crude hack, which can be interpreted roughly as finding + the first derivative of the location expression w.r.t. the + supplied frame and stack pointer values. */ + regs.fp = 0; + regs.ip = ip; + regs.sp = 6 * 1024; + res_sp_6k = ML_(evaluate_GX)( var->gexpr, var->fbGX, ®s, di ); + + regs.fp = 0; + regs.ip = ip; + regs.sp = 7 * 1024; + res_sp_7k = ML_(evaluate_GX)( var->gexpr, var->fbGX, ®s, di ); + + regs.fp = 6 * 1024; + regs.ip = ip; + regs.sp = 0; + res_fp_6k = ML_(evaluate_GX)( var->gexpr, var->fbGX, ®s, di ); + + regs.fp = 7 * 1024; + regs.ip = ip; + regs.sp = 0; + res_fp_7k = ML_(evaluate_GX)( var->gexpr, var->fbGX, ®s, di ); + + vg_assert(res_sp_6k.kind == res_sp_7k.kind); + vg_assert(res_sp_6k.kind == res_fp_6k.kind); + vg_assert(res_sp_6k.kind == res_fp_7k.kind); + + if (res_sp_6k.kind == GXR_Value) { + StackBlock block; + GXResult res; + UWord sp_delta = res_sp_7k.word - res_sp_6k.word; + UWord fp_delta = res_fp_7k.word - res_fp_6k.word; + tl_assert(sp_delta == 0 || sp_delta == 1024); + tl_assert(fp_delta == 0 || fp_delta == 1024); + + if (sp_delta == 0 && fp_delta == 0) { + /* depends neither on sp nor fp, so it can't be a stack + local. Ignore it. */ + } + else + if (sp_delta == 1024 && fp_delta == 0) { + regs.sp = regs.fp = 0; + regs.ip = ip; + res = ML_(evaluate_GX)( var->gexpr, var->fbGX, ®s, di ); + tl_assert(res.kind == GXR_Value); + if (debug) + VG_(printf)(" %5ld .. %5ld (sp) %s\n", + res.word, res.word + ((UWord)mul.ul) - 1, var->name); + block.base = res.word; + block.szB = (SizeT)mul.ul; + block.spRel = True; + block.isVec = isVec; + VG_(memset)( &block.name[0], 0, sizeof(block.name) ); + if (var->name) + VG_(strncpy)( &block.name[0], var->name, sizeof(block.name)-1 ); + block.name[ sizeof(block.name)-1 ] = 0; + VG_(addToXA)( blocks, &block ); + } + else + if (sp_delta == 0 && fp_delta == 1024) { + regs.sp = regs.fp = 0; + regs.ip = ip; + res = ML_(evaluate_GX)( var->gexpr, var->fbGX, ®s, di ); + tl_assert(res.kind == GXR_Value); + if (debug) + VG_(printf)(" %5ld .. %5ld (FP) %s\n", + res.word, res.word + ((UWord)mul.ul) - 1, var->name); + block.base = res.word; + block.szB = (SizeT)mul.ul; + block.spRel = False; + block.isVec = isVec; + VG_(memset)( &block.name[0], 0, sizeof(block.name) ); + if (var->name) + VG_(strncpy)( &block.name[0], var->name, sizeof(block.name)-1 ); + block.name[ sizeof(block.name)-1 ] = 0; + VG_(addToXA)( blocks, &block ); + } + else { + vg_assert(0); + } + } +} + + +/* Get an XArray of StackBlock which describe the stack (auto) blocks + for this ip. The caller is expected to free the XArray at some + point. If 'arrays_only' is True, only array-typed blocks are + returned; otherwise blocks of all types are returned. */ + +void* /* really, XArray* of StackBlock */ + VG_(di_get_stack_blocks_at_ip)( Addr ip, Bool arrays_only ) +{ + /* This is a derivation of consider_vars_in_frame() above. */ + Word i; + DebugInfo* di; + RegSummary regs; + Bool debug = False; + + XArray* res = VG_(newXA)( ML_(dinfo_zalloc), "di.debuginfo.dgsbai.1", + ML_(dinfo_free), + sizeof(StackBlock) ); + + static UInt n_search = 0; + static UInt n_steps = 0; + n_search++; + if (debug) + VG_(printf)("QQQQ: dgsbai: ip %#lx\n", ip); + /* first, find the DebugInfo that pertains to 'ip'. */ + for (di = debugInfo_list; di; di = di->next) { + n_steps++; + /* text segment missing? unlikely, but handle it .. */ + if (!di->text_present || di->text_size == 0) + continue; + /* Ok. So does this text mapping bracket the ip? */ + if (di->text_avma <= ip && ip < di->text_avma + di->text_size) + break; + } + + /* Didn't find it. Strange -- means ip is a code address outside + of any mapped text segment. Unlikely but not impossible -- app + could be generating code to run. */ + if (!di) + return res; /* currently empty */ + + if (0 && ((n_search & 0x1) == 0)) + VG_(printf)("VG_(di_get_stack_blocks_at_ip): %u searches, " + "%u DebugInfos looked at\n", + n_search, n_steps); + /* Start of performance-enhancing hack: once every ??? (chosen + hackily after profiling) successful searches, move the found + DebugInfo one step closer to the start of the list. This makes + future searches cheaper. */ + if ((n_search & 0xFFFF) == 0) { + /* Move si one step closer to the start of the list. */ + move_DebugInfo_one_step_forward( di ); + } + /* End of performance-enhancing hack. */ + + /* any var info at all? */ + if (!di->varinfo) + return res; /* currently empty */ + + /* Work through the scopes from most deeply nested outwards, + looking for code address ranges that bracket 'ip'. The + variables on each such address range found are in scope right + now. Don't descend to level zero as that is the global + scope. */ + regs.ip = ip; + regs.sp = 0; + regs.fp = 0; + + /* "for each scope, working outwards ..." */ + for (i = VG_(sizeXA)(di->varinfo) - 1; i >= 1; i--) { + XArray* vars; + Word j; + DiAddrRange* arange; + OSet* this_scope + = *(OSet**)VG_(indexXA)( di->varinfo, i ); + if (debug) + VG_(printf)("QQQQ: considering scope %ld\n", (Word)i); + if (!this_scope) + continue; + /* Find the set of variables in this scope that + bracket the program counter. */ + arange = VG_(OSetGen_LookupWithCmp)( + this_scope, &ip, + ML_(cmp_for_DiAddrRange_range) + ); + if (!arange) + continue; + /* stay sane */ + vg_assert(arange->aMin <= arange->aMax); + /* It must bracket the ip we asked for, else + ML_(cmp_for_DiAddrRange_range) is somehow broken. */ + vg_assert(arange->aMin <= ip && ip <= arange->aMax); + /* It must have an attached XArray of DiVariables. */ + vars = arange->vars; + vg_assert(vars); + /* But it mustn't cover the entire address range. We only + expect that to happen for the global scope (level 0), which + we're not looking at here. Except, it may cover the entire + address range, but in that case the vars array must be + empty. */ + vg_assert(! (arange->aMin == (Addr)0 + && arange->aMax == ~(Addr)0 + && VG_(sizeXA)(vars) > 0) ); + for (j = 0; j < VG_(sizeXA)( vars ); j++) { + DiVariable* var = (DiVariable*)VG_(indexXA)( vars, j ); + if (debug) + VG_(printf)("QQQQ: var:name=%s %#lx-%#lx %#lx\n", + var->name,arange->aMin,arange->aMax,ip); + analyse_deps( res, di->admin_tyents, ip, + di, var, arrays_only ); + } + } + + return res; +} + + +/* Get an array of GlobalBlock which describe the global blocks owned + by the shared object characterised by the given di_handle. Asserts + if the handle is invalid. The caller is responsible for freeing + the array at some point. If 'arrays_only' is True, only + array-typed blocks are returned; otherwise blocks of all types are + returned. */ + +void* /* really, XArray* of GlobalBlock */ + VG_(di_get_global_blocks_from_dihandle) ( ULong di_handle, + Bool arrays_only ) +{ + /* This is a derivation of consider_vars_in_frame() above. */ + + DebugInfo* di; + XArray* gvars; /* XArray* of GlobalBlock */ + Word nScopes, scopeIx; + + /* The first thing to do is find the DebugInfo that + pertains to 'di_handle'. */ + tl_assert(di_handle > 0); + for (di = debugInfo_list; di; di = di->next) { + if (di->handle == di_handle) + break; + } + + /* If this fails, we were unable to find any DebugInfo with the + given handle. This is considered an error on the part of the + caller. */ + tl_assert(di != NULL); + + /* we'll put the collected variables in here. */ + gvars = VG_(newXA)( ML_(dinfo_zalloc), "di.debuginfo.dggbfd.1", + ML_(dinfo_free), sizeof(GlobalBlock) ); + tl_assert(gvars); + + /* any var info at all? */ + if (!di->varinfo) + return gvars; + + /* we'll iterate over all the variables we can find, even if + it seems senseless to visit stack-allocated variables */ + /* Iterate over all scopes */ + nScopes = VG_(sizeXA)( di->varinfo ); + for (scopeIx = 0; scopeIx < nScopes; scopeIx++) { + + /* Iterate over each (code) address range at the current scope */ + DiAddrRange* range; + OSet* /* of DiAddrInfo */ scope + = *(OSet**)VG_(indexXA)( di->varinfo, scopeIx ); + tl_assert(scope); + VG_(OSetGen_ResetIter)(scope); + while ( (range = VG_(OSetGen_Next)(scope)) ) { + + /* Iterate over each variable in the current address range */ + Word nVars, varIx; + tl_assert(range->vars); + nVars = VG_(sizeXA)( range->vars ); + for (varIx = 0; varIx < nVars; varIx++) { + + Bool isVec; + GXResult res; + MaybeULong mul; + GlobalBlock gb; + TyEnt* ty; + DiVariable* var = VG_(indexXA)( range->vars, varIx ); + tl_assert(var->name); + if (0) VG_(printf)("at depth %ld var %s ", scopeIx, var->name ); + + /* Now figure out if this variable has a constant address + (that is, independent of FP, SP, phase of moon, etc), + and if so, what the address is. Any variable with a + constant address is deemed to be a global so we collect + it. */ + if (0) { VG_(printf)("EVAL: "); ML_(pp_GX)(var->gexpr); + VG_(printf)("\n"); } + res = ML_(evaluate_trivial_GX)( var->gexpr, di ); + + /* Not a constant address => not interesting */ + if (res.kind != GXR_Value) { + if (0) VG_(printf)("FAIL\n"); + continue; + } + + /* Ok, it's a constant address. See if we want to collect + it. */ + if (0) VG_(printf)("%#lx\n", res.word); + + /* Figure out how big the variable is. */ + mul = ML_(sizeOfType)(di->admin_tyents, var->typeR); + + /* If this var has a type whose size is unknown, zero, or + impossibly large, it should never have been added. + ML_(addVar) should have rejected it. */ + vg_assert(mul.b == True); + vg_assert(mul.ul > 0); + if (sizeof(void*) == 4) vg_assert(mul.ul < (1ULL << 32)); + /* After this point, we assume we can truncate mul.ul to a + host word safely (without loss of info). */ + + /* skip if non-array and we're only interested in + arrays */ + ty = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL, + var->typeR ); + vg_assert(ty); + vg_assert(ty->tag == Te_UNKNOWN || ML_(TyEnt__is_type)(ty)); + if (ty->tag == Te_UNKNOWN) + continue; /* perhaps we should complain in this case? */ + + isVec = ty->tag == Te_TyArray; + if (arrays_only && !isVec) continue; + + /* Ok, so collect it! */ + tl_assert(var->name); + tl_assert(di->soname); + if (0) VG_(printf)("XXXX %s %s %d\n", var->name, + var->fileName?(HChar*)var->fileName + :"??",var->lineNo); + VG_(memset)(&gb, 0, sizeof(gb)); + gb.addr = res.word; + gb.szB = (SizeT)mul.ul; + gb.isVec = isVec; + VG_(strncpy)(&gb.name[0], var->name, sizeof(gb.name)-1); + VG_(strncpy)(&gb.soname[0], di->soname, sizeof(gb.soname)-1); + tl_assert(gb.name[ sizeof(gb.name)-1 ] == 0); + tl_assert(gb.soname[ sizeof(gb.soname)-1 ] == 0); + + VG_(addToXA)( gvars, &gb ); + + } /* for (varIx = 0; varIx < nVars; varIx++) */ + + } /* while ( (range = VG_(OSetGen_Next)(scope)) ) */ + + } /* for (scopeIx = 0; scopeIx < nScopes; scopeIx++) */ + + return gvars; +} + + +/*------------------------------------------------------------*/ +/*--- DebugInfo accessor functions ---*/ +/*------------------------------------------------------------*/ + +const DebugInfo* VG_(next_seginfo)(const DebugInfo* di) +{ + if (di == NULL) + return debugInfo_list; + return di->next; +} + +Addr VG_(seginfo_get_text_avma)(const DebugInfo* di) +{ + return di->text_present ? di->text_avma : 0; +} + +SizeT VG_(seginfo_get_text_size)(const DebugInfo* di) +{ + return di->text_present ? di->text_size : 0; +} + +Addr VG_(seginfo_get_plt_avma)(const DebugInfo* di) +{ + return di->plt_present ? di->plt_avma : 0; +} + +SizeT VG_(seginfo_get_plt_size)(const DebugInfo* di) +{ + return di->plt_present ? di->plt_size : 0; +} + +Addr VG_(seginfo_get_gotplt_avma)(const DebugInfo* di) +{ + return di->gotplt_present ? di->gotplt_avma : 0; +} + +SizeT VG_(seginfo_get_gotplt_size)(const DebugInfo* di) +{ + return di->gotplt_present ? di->gotplt_size : 0; +} + +const UChar* VG_(seginfo_soname)(const DebugInfo* di) +{ + return di->soname; +} + +const UChar* VG_(seginfo_filename)(const DebugInfo* di) +{ + return di->filename; +} + +PtrdiffT VG_(seginfo_get_text_bias)(const DebugInfo* di) +{ + return di->text_present ? di->text_bias : 0; +} + +Int VG_(seginfo_syms_howmany) ( const DebugInfo *si ) +{ + return si->symtab_used; +} + +void VG_(seginfo_syms_getidx) ( const DebugInfo *si, + Int idx, + /*OUT*/Addr* avma, + /*OUT*/Addr* tocptr, + /*OUT*/UInt* size, + /*OUT*/HChar** name, + /*OUT*/Bool* isText ) +{ + vg_assert(idx >= 0 && idx < si->symtab_used); + if (avma) *avma = si->symtab[idx].addr; + if (tocptr) *tocptr = si->symtab[idx].tocptr; + if (size) *size = si->symtab[idx].size; + if (name) *name = (HChar*)si->symtab[idx].name; + if (isText) *isText = si->symtab[idx].isText; +} + + +/*------------------------------------------------------------*/ +/*--- SectKind query functions ---*/ +/*------------------------------------------------------------*/ + +/* Convert a VgSectKind to a string, which must be copied if you want + to change it. */ +const HChar* VG_(pp_SectKind)( VgSectKind kind ) +{ + switch (kind) { + case Vg_SectUnknown: return "Unknown"; + case Vg_SectText: return "Text"; + case Vg_SectData: return "Data"; + case Vg_SectBSS: return "BSS"; + case Vg_SectGOT: return "GOT"; + case Vg_SectPLT: return "PLT"; + case Vg_SectOPD: return "OPD"; + case Vg_SectGOTPLT: return "GOTPLT"; + default: vg_assert(0); + } +} + +/* Given an address 'a', make a guess of which section of which object + it comes from. If name is non-NULL, then the last n_name-1 + characters of the object's name is put in name[0 .. n_name-2], and + name[n_name-1] is set to zero (guaranteed zero terminated). */ + +VgSectKind VG_(seginfo_sect_kind)( /*OUT*/UChar* name, SizeT n_name, + Addr a) +{ + DebugInfo* di; + VgSectKind res = Vg_SectUnknown; + + for (di = debugInfo_list; di != NULL; di = di->next) { + + if (0) + VG_(printf)( + "addr=%#lx di=%p %s got=%#lx,%ld plt=%#lx,%ld data=%#lx,%ld bss=%#lx,%ld\n", + a, di, di->filename, + di->got_avma, di->got_size, + di->plt_avma, di->plt_size, + di->data_avma, di->data_size, + di->bss_avma, di->bss_size); + + if (di->text_present + && di->text_size > 0 + && a >= di->text_avma && a < di->text_avma + di->text_size) { + res = Vg_SectText; + break; + } + if (di->data_present + && di->data_size > 0 + && a >= di->data_avma && a < di->data_avma + di->data_size) { + res = Vg_SectData; + break; + } + if (di->sdata_present + && di->sdata_size > 0 + && a >= di->sdata_avma && a < di->sdata_avma + di->sdata_size) { + res = Vg_SectData; + break; + } + if (di->bss_present + && di->bss_size > 0 + && a >= di->bss_avma && a < di->bss_avma + di->bss_size) { + res = Vg_SectBSS; + break; + } + if (di->sbss_present + && di->sbss_size > 0 + && a >= di->sbss_avma && a < di->sbss_avma + di->sbss_size) { + res = Vg_SectBSS; + break; + } + if (di->plt_present + && di->plt_size > 0 + && a >= di->plt_avma && a < di->plt_avma + di->plt_size) { + res = Vg_SectPLT; + break; + } + if (di->got_present + && di->got_size > 0 + && a >= di->got_avma && a < di->got_avma + di->got_size) { + res = Vg_SectGOT; + break; + } + if (di->gotplt_present + && di->gotplt_size > 0 + && a >= di->gotplt_avma && a < di->gotplt_avma + di->gotplt_size) { + res = Vg_SectGOTPLT; + break; + } + if (di->opd_present + && di->opd_size > 0 + && a >= di->opd_avma && a < di->opd_avma + di->opd_size) { + res = Vg_SectOPD; + break; + } + /* we could also check for .eh_frame, if anyone really cares */ + } + + vg_assert( (di == NULL && res == Vg_SectUnknown) + || (di != NULL && res != Vg_SectUnknown) ); + + if (name) { + + vg_assert(n_name >= 8); + + if (di && di->filename) { + Int i, j; + Int fnlen = VG_(strlen)(di->filename); + Int start_at = 1 + fnlen - n_name; + if (start_at < 0) start_at = 0; + vg_assert(start_at < fnlen); + i = start_at; j = 0; + while (True) { + vg_assert(j >= 0 && j < n_name); + vg_assert(i >= 0 && i <= fnlen); + name[j] = di->filename[i]; + if (di->filename[i] == 0) break; + i++; j++; + } + vg_assert(i == fnlen); + } else { + VG_(snprintf)(name, n_name, "%s", "???"); + } + + name[n_name-1] = 0; + } + + return res; + +} + + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/misc.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/misc.c.svn-base new file mode 100644 index 0000000..ec35c36 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/misc.c.svn-base @@ -0,0 +1,72 @@ + +/*--------------------------------------------------------------------*/ +/*--- Misc simple stuff lacking a better home. misc.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2008-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +#include "pub_core_basics.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_mallocfree.h" +#include "pub_core_xarray.h" + +#include "priv_misc.h" /* self */ + + +void* ML_(dinfo_zalloc) ( HChar* cc, SizeT szB ) { + void* v; + vg_assert(szB > 0); + v = VG_(arena_malloc)( VG_AR_DINFO, cc, szB ); + vg_assert(v); + VG_(memset)(v, 0, szB); + return v; +} + +void ML_(dinfo_free) ( void* v ) { + VG_(arena_free)( VG_AR_DINFO, v ); +} + +UChar* ML_(dinfo_strdup) ( HChar* cc, const UChar* str ) { + return VG_(arena_strdup)( VG_AR_DINFO, cc, str ); +} + +UChar* ML_(dinfo_memdup)( HChar* cc, UChar* str, SizeT nStr ) { + UChar* dst = VG_(arena_malloc)( VG_AR_DINFO, cc, nStr ); + tl_assert(dst); + VG_(memcpy)(dst, str, nStr); + return dst; +} + + +/*--------------------------------------------------------------------*/ +/*--- end misc.c ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_d3basics.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_d3basics.h.svn-base new file mode 100644 index 0000000..1368e0d --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_d3basics.h.svn-base @@ -0,0 +1,651 @@ + +/*--------------------------------------------------------------------*/ +/*--- Basic definitions and helper functions for DWARF3. ---*/ +/*--- priv_d3basics.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2008-2009 OpenWorks LLP and others; see below + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + ------------- + + Some of this code (DWARF3 enumerations) is taken from FSF's + gdb-6.6/include/elf/dwarf2.h, which is Copyright (C) 1992 to 2006 + Free Software Foundation, Inc and is also GPL-2-or-later. +*/ + +#ifndef __PRIV_D3BASICS_H +#define __PRIV_D3BASICS_H + + +/* This stuff is taken from gdb-6.6/include/elf/dwarf2.h, which is + GPL2+. +*/ +/* Tag names and codes. */ +typedef enum + { + DW_TAG_padding = 0x00, + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, + DW_TAG_template_type_param = 0x2f, + DW_TAG_template_value_param = 0x30, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + /* DWARF 3. */ + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + DW_TAG_condition = 0x3f, + DW_TAG_shared_type = 0x40, + /* SGI/MIPS Extensions. */ + DW_TAG_MIPS_loop = 0x4081, + /* HP extensions. See: ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz . */ + DW_TAG_HP_array_descriptor = 0x4090, + /* GNU extensions. */ + DW_TAG_format_label = 0x4101, /* For FORTRAN 77 and Fortran 90. */ + DW_TAG_function_template = 0x4102, /* For C++. */ + DW_TAG_class_template = 0x4103, /* For C++. */ + DW_TAG_GNU_BINCL = 0x4104, + DW_TAG_GNU_EINCL = 0x4105, + /* Extensions for UPC. See: http://upc.gwu.edu/~upc. */ + DW_TAG_upc_shared_type = 0x8765, + DW_TAG_upc_strict_type = 0x8766, + DW_TAG_upc_relaxed_type = 0x8767, + /* PGI (STMicroelectronics) extensions. No documentation available. */ + DW_TAG_PGI_kanji_type = 0xA000, + DW_TAG_PGI_interface_block = 0xA020 + } + DW_TAG; + +#define DW_TAG_lo_user 0x4080 +#define DW_TAG_hi_user 0xffff + +/* Flag that tells whether entry has a child or not. */ +typedef enum + { + DW_children_no = 0, + DW_children_yes = 1 + } + DW_children; + +/* Source language names and codes. */ +typedef enum dwarf_source_language + { + DW_LANG_C89 = 0x0001, + DW_LANG_C = 0x0002, + DW_LANG_Ada83 = 0x0003, + DW_LANG_C_plus_plus = 0x0004, + DW_LANG_Cobol74 = 0x0005, + DW_LANG_Cobol85 = 0x0006, + DW_LANG_Fortran77 = 0x0007, + DW_LANG_Fortran90 = 0x0008, + DW_LANG_Pascal83 = 0x0009, + DW_LANG_Modula2 = 0x000a, + /* DWARF 3. */ + DW_LANG_Java = 0x000b, + DW_LANG_C99 = 0x000c, + DW_LANG_Ada95 = 0x000d, + DW_LANG_Fortran95 = 0x000e, + DW_LANG_PLI = 0x000f, + DW_LANG_ObjC = 0x0010, + DW_LANG_ObjC_plus_plus = 0x0011, + DW_LANG_UPC = 0x0012, + DW_LANG_D = 0x0013, + /* MIPS. */ + DW_LANG_Mips_Assembler = 0x8001, + /* UPC. */ + DW_LANG_Upc = 0x8765 + } + DW_LANG; + +/* Form names and codes. */ +typedef enum + { + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16 + } + DW_FORM; + +/* Attribute names and codes. */ +typedef enum + { + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_stride_size = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_items = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + /* DWARF 3 values. */ + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + DW_AT_description = 0x5a, + DW_AT_binary_scale = 0x5b, + DW_AT_decimal_scale = 0x5c, + DW_AT_small = 0x5d, + DW_AT_decimal_sign = 0x5e, + DW_AT_digit_count = 0x5f, + DW_AT_picture_string = 0x60, + DW_AT_mutable = 0x61, + DW_AT_threads_scaled = 0x62, + DW_AT_explicit = 0x63, + DW_AT_object_pointer = 0x64, + DW_AT_endianity = 0x65, + DW_AT_elemental = 0x66, + DW_AT_pure = 0x67, + DW_AT_recursive = 0x68, + /* SGI/MIPS extensions. */ + DW_AT_MIPS_fde = 0x2001, + DW_AT_MIPS_loop_begin = 0x2002, + DW_AT_MIPS_tail_loop_begin = 0x2003, + DW_AT_MIPS_epilog_begin = 0x2004, + DW_AT_MIPS_loop_unroll_factor = 0x2005, + DW_AT_MIPS_software_pipeline_depth = 0x2006, + DW_AT_MIPS_linkage_name = 0x2007, + DW_AT_MIPS_stride = 0x2008, + DW_AT_MIPS_abstract_name = 0x2009, + DW_AT_MIPS_clone_origin = 0x200a, + DW_AT_MIPS_has_inlines = 0x200b, + /* HP extensions. */ + DW_AT_HP_block_index = 0x2000, + DW_AT_HP_unmodifiable = 0x2001, /* Same as DW_AT_MIPS_fde. */ + DW_AT_HP_actuals_stmt_list = 0x2010, + DW_AT_HP_proc_per_section = 0x2011, + DW_AT_HP_raw_data_ptr = 0x2012, + DW_AT_HP_pass_by_reference = 0x2013, + DW_AT_HP_opt_level = 0x2014, + DW_AT_HP_prof_version_id = 0x2015, + DW_AT_HP_opt_flags = 0x2016, + DW_AT_HP_cold_region_low_pc = 0x2017, + DW_AT_HP_cold_region_high_pc = 0x2018, + DW_AT_HP_all_variables_modifiable = 0x2019, + DW_AT_HP_linkage_name = 0x201a, + DW_AT_HP_prof_flags = 0x201b, /* In comp unit of procs_info for -g. */ + /* GNU extensions. */ + DW_AT_sf_names = 0x2101, + DW_AT_src_info = 0x2102, + DW_AT_mac_info = 0x2103, + DW_AT_src_coords = 0x2104, + DW_AT_body_begin = 0x2105, + DW_AT_body_end = 0x2106, + DW_AT_GNU_vector = 0x2107, + /* VMS extensions. */ + DW_AT_VMS_rtnbeg_pd_address = 0x2201, + /* UPC extension. */ + DW_AT_upc_threads_scaled = 0x3210, + /* PGI (STMicroelectronics) extensions. */ + DW_AT_PGI_lbase = 0x3a00, + DW_AT_PGI_soffset = 0x3a01, + DW_AT_PGI_lstride = 0x3a02 + } + DW_AT; + +#define DW_AT_lo_user 0x2000 /* Implementation-defined range start. */ +#define DW_AT_hi_user 0x3ff0 /* Implementation-defined range end. */ + +/* Type encodings. */ +typedef enum + { + DW_ATE_void = 0x0, + DW_ATE_address = 0x1, + DW_ATE_boolean = 0x2, + DW_ATE_complex_float = 0x3, + DW_ATE_float = 0x4, + DW_ATE_signed = 0x5, + DW_ATE_signed_char = 0x6, + DW_ATE_unsigned = 0x7, + DW_ATE_unsigned_char = 0x8, + /* DWARF 3. */ + DW_ATE_imaginary_float = 0x9, + DW_ATE_packed_decimal = 0xa, + DW_ATE_numeric_string = 0xb, + DW_ATE_edited = 0xc, + DW_ATE_signed_fixed = 0xd, + DW_ATE_unsigned_fixed = 0xe, + DW_ATE_decimal_float = 0xf, + /* HP extensions. */ + DW_ATE_HP_float80 = 0x80, /* Floating-point (80 bit). */ + DW_ATE_HP_complex_float80 = 0x81, /* Complex floating-point (80 bit). */ + DW_ATE_HP_float128 = 0x82, /* Floating-point (128 bit). */ + DW_ATE_HP_complex_float128 = 0x83, /* Complex floating-point (128 bit). */ + DW_ATE_HP_floathpintel = 0x84, /* Floating-point (82 bit IA64). */ + DW_ATE_HP_imaginary_float80 = 0x85, + DW_ATE_HP_imaginary_float128 = 0x86 + } + DW_ATE; + + +/* Expression operations. */ +typedef enum + { + DW_OP_addr = 0x03, + DW_OP_deref = 0x06, + DW_OP_const1u = 0x08, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_const8u = 0x0e, + DW_OP_const8s = 0x0f, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_dup = 0x12, + DW_OP_drop = 0x13, + DW_OP_over = 0x14, + DW_OP_pick = 0x15, + DW_OP_swap = 0x16, + DW_OP_rot = 0x17, + DW_OP_xderef = 0x18, + DW_OP_abs = 0x19, + DW_OP_and = 0x1a, + DW_OP_div = 0x1b, + DW_OP_minus = 0x1c, + DW_OP_mod = 0x1d, + DW_OP_mul = 0x1e, + DW_OP_neg = 0x1f, + DW_OP_not = 0x20, + DW_OP_or = 0x21, + DW_OP_plus = 0x22, + DW_OP_plus_uconst = 0x23, + DW_OP_shl = 0x24, + DW_OP_shr = 0x25, + DW_OP_shra = 0x26, + DW_OP_xor = 0x27, + DW_OP_bra = 0x28, + DW_OP_eq = 0x29, + DW_OP_ge = 0x2a, + DW_OP_gt = 0x2b, + DW_OP_le = 0x2c, + DW_OP_lt = 0x2d, + DW_OP_ne = 0x2e, + DW_OP_skip = 0x2f, + DW_OP_lit0 = 0x30, + DW_OP_lit1 = 0x31, + DW_OP_lit2 = 0x32, + DW_OP_lit3 = 0x33, + DW_OP_lit4 = 0x34, + DW_OP_lit5 = 0x35, + DW_OP_lit6 = 0x36, + DW_OP_lit7 = 0x37, + DW_OP_lit8 = 0x38, + DW_OP_lit9 = 0x39, + DW_OP_lit10 = 0x3a, + DW_OP_lit11 = 0x3b, + DW_OP_lit12 = 0x3c, + DW_OP_lit13 = 0x3d, + DW_OP_lit14 = 0x3e, + DW_OP_lit15 = 0x3f, + DW_OP_lit16 = 0x40, + DW_OP_lit17 = 0x41, + DW_OP_lit18 = 0x42, + DW_OP_lit19 = 0x43, + DW_OP_lit20 = 0x44, + DW_OP_lit21 = 0x45, + DW_OP_lit22 = 0x46, + DW_OP_lit23 = 0x47, + DW_OP_lit24 = 0x48, + DW_OP_lit25 = 0x49, + DW_OP_lit26 = 0x4a, + DW_OP_lit27 = 0x4b, + DW_OP_lit28 = 0x4c, + DW_OP_lit29 = 0x4d, + DW_OP_lit30 = 0x4e, + DW_OP_lit31 = 0x4f, + DW_OP_reg0 = 0x50, + DW_OP_reg1 = 0x51, + DW_OP_reg2 = 0x52, + DW_OP_reg3 = 0x53, + DW_OP_reg4 = 0x54, + DW_OP_reg5 = 0x55, + DW_OP_reg6 = 0x56, + DW_OP_reg7 = 0x57, + DW_OP_reg8 = 0x58, + DW_OP_reg9 = 0x59, + DW_OP_reg10 = 0x5a, + DW_OP_reg11 = 0x5b, + DW_OP_reg12 = 0x5c, + DW_OP_reg13 = 0x5d, + DW_OP_reg14 = 0x5e, + DW_OP_reg15 = 0x5f, + DW_OP_reg16 = 0x60, + DW_OP_reg17 = 0x61, + DW_OP_reg18 = 0x62, + DW_OP_reg19 = 0x63, + DW_OP_reg20 = 0x64, + DW_OP_reg21 = 0x65, + DW_OP_reg22 = 0x66, + DW_OP_reg23 = 0x67, + DW_OP_reg24 = 0x68, + DW_OP_reg25 = 0x69, + DW_OP_reg26 = 0x6a, + DW_OP_reg27 = 0x6b, + DW_OP_reg28 = 0x6c, + DW_OP_reg29 = 0x6d, + DW_OP_reg30 = 0x6e, + DW_OP_reg31 = 0x6f, + DW_OP_breg0 = 0x70, + DW_OP_breg1 = 0x71, + DW_OP_breg2 = 0x72, + DW_OP_breg3 = 0x73, + DW_OP_breg4 = 0x74, + DW_OP_breg5 = 0x75, + DW_OP_breg6 = 0x76, + DW_OP_breg7 = 0x77, + DW_OP_breg8 = 0x78, + DW_OP_breg9 = 0x79, + DW_OP_breg10 = 0x7a, + DW_OP_breg11 = 0x7b, + DW_OP_breg12 = 0x7c, + DW_OP_breg13 = 0x7d, + DW_OP_breg14 = 0x7e, + DW_OP_breg15 = 0x7f, + DW_OP_breg16 = 0x80, + DW_OP_breg17 = 0x81, + DW_OP_breg18 = 0x82, + DW_OP_breg19 = 0x83, + DW_OP_breg20 = 0x84, + DW_OP_breg21 = 0x85, + DW_OP_breg22 = 0x86, + DW_OP_breg23 = 0x87, + DW_OP_breg24 = 0x88, + DW_OP_breg25 = 0x89, + DW_OP_breg26 = 0x8a, + DW_OP_breg27 = 0x8b, + DW_OP_breg28 = 0x8c, + DW_OP_breg29 = 0x8d, + DW_OP_breg30 = 0x8e, + DW_OP_breg31 = 0x8f, + DW_OP_regx = 0x90, + DW_OP_fbreg = 0x91, + DW_OP_bregx = 0x92, + DW_OP_piece = 0x93, + DW_OP_deref_size = 0x94, + DW_OP_xderef_size = 0x95, + DW_OP_nop = 0x96, + /* DWARF 3 extensions. */ + DW_OP_push_object_address = 0x97, + DW_OP_call2 = 0x98, + DW_OP_call4 = 0x99, + DW_OP_call_ref = 0x9a, + DW_OP_form_tls_address = 0x9b, + DW_OP_call_frame_cfa = 0x9c, + DW_OP_bit_piece = 0x9d, + /* GNU extensions. */ + DW_OP_GNU_push_tls_address = 0xe0, + /* HP extensions. */ + DW_OP_HP_unknown = 0xe0, /* Ouch, the same as GNU_push_tls_address. */ + DW_OP_HP_is_value = 0xe1, + DW_OP_HP_fltconst4 = 0xe2, + DW_OP_HP_fltconst8 = 0xe3, + DW_OP_HP_mod_range = 0xe4, + DW_OP_HP_unmod_range = 0xe5, + DW_OP_HP_tls = 0xe6 + } + DW_OP; + +HChar* ML_(pp_DW_children) ( DW_children hashch ); +HChar* ML_(pp_DW_TAG) ( DW_TAG tag ); +HChar* ML_(pp_DW_FORM) ( DW_FORM form ); +HChar* ML_(pp_DW_AT) ( DW_AT attr ); + + +/* --- To do with evaluation of Dwarf expressions --- */ + +/* Guarded Dwarf3 expressions, which can be linked together to form a + list. The payload field contains a variable length array of bytes + which hold the guarded expressions. The length can be inferred by + inspecting the payload bytes and so does not need to be stored + explicitly. + + Guarded-Expression format is similar but not identical to the + DWARF3 location-list format. The format of each returned block is: + + UChar biasMe; + UChar isEnd; + followed by zero or more of + + (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd) + + '..bytes..' is an standard DWARF3 location expression which is + valid when aMin <= pc <= aMax (possibly after suitable biasing). + + The number of bytes in '..bytes..' is nbytes. + + The end of the sequence is marked by an isEnd == 1 value. All + previous isEnd values must be zero. + + biasMe is 1 if the aMin/aMax fields need this DebugInfo's text_bias + added before use, and 0 if the GX is this is not necessary (is + ready to go). + + Hence the block can be quickly parsed and is self-describing. Note + that aMax is 1 less than the corresponding value in a DWARF3 + location list. Zero length ranges, with aMax == aMin-1, are not + allowed. +*/ +typedef + struct _GExpr { + UChar payload[0]; + } + GExpr; + +/* Show a so-called guarded expression */ +void ML_(pp_GX) ( GExpr* gx ); + +/* Evaluation of a DWARF3 expression (and hence of a GExpr) may + require knowing a suitably contextualising set of values for the + instruction, frame and stack pointers (and, in general, all + registers, though we punt on such generality here). Here's a + struct to carry the bare essentials. ip, fp and sp are expected to + be provided for all platforms. */ +typedef + struct { Addr ip; Addr sp; Addr fp; } + RegSummary; + +/* This describes the result of evaluating a DWARF3 expression. + GXR_Failure: failed; .word is an asciiz string summarising why + GXR_Value: evaluated to a value, in .word + GXR_RegNo: evaluated to a DWARF3 register number, in .word +*/ +typedef + struct { + enum { GXR_Failure, GXR_Value, GXR_RegNo } kind; + UWord word; + } + GXResult; + +void ML_(pp_GXResult) ( GXResult res ); + +/* Evaluate a guarded expression. If regs is NULL, then gx is assumed + (and checked) to contain just a single guarded expression, with a + guard which covers the entire address space and so always evaluates + to True (iow, gx is a single unconditional expression). If regs is + non-NULL then its .ip value is used to select which of the + embedded DWARF3 location expressions to use, and that is duly + evaluated. + + If as part of the evaluation, a frame base value needs to be + computed, then fbGX can provide an expression for it. If fbGX is + NULL but the frame base is still needed, then evaluation of gx as a + whole will fail. */ +GXResult ML_(evaluate_GX)( GExpr* gx, GExpr* fbGX, + RegSummary* regs, const DebugInfo* di ); + +/* This is a subsidiary of ML_(evaluate_GX), which just evaluates a + single standard DWARF3 expression. Conventions w.r.t regs and fbGX + are as for ML_(evaluate_GX). If push_initial_zero is True, then an + initial zero word is pushed on the evaluation stack at the start. + This is needed for computing structure field offsets. Note that + ML_(evaluate_GX) and ML_(evaluate_Dwarf3_Expr) are mutually + recursive. */ +GXResult ML_(evaluate_Dwarf3_Expr) ( UChar* expr, UWord exprszB, + GExpr* fbGX, RegSummary* regs, + const DebugInfo* di, + Bool push_initial_zero ); + +/* Evaluate a very simple Guarded (DWARF3) expression. The expression + is expected to denote a constant, with no reference to any + registers nor to any frame base expression. GXR_Failure is + returned if there is more than one guard, or none, a register + location is denoted, a frame base expression is required, or the + expression is not manifestly a constant. The range of addresses + covered by the guard is also ignored. */ +GXResult ML_(evaluate_trivial_GX)( GExpr* gx, const DebugInfo* di ); + +#endif /* ndef __PRIV_D3BASICS_H */ + +/*--------------------------------------------------------------------*/ +/*--- end priv_d3basics.h ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_misc.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_misc.h.svn-base new file mode 100644 index 0000000..fd786a5 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_misc.h.svn-base @@ -0,0 +1,55 @@ + +/*--------------------------------------------------------------------*/ +/*--- Misc simple stuff lacking a better home. priv_misc.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2008-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +#ifndef __PRIV_MISC_H +#define __PRIV_MISC_H + + +/* Allocate(zeroed), free, strdup, memdup, all in VG_AR_DINFO. */ +void* ML_(dinfo_zalloc)( HChar* cc, SizeT szB ); +void ML_(dinfo_free)( void* v ); +UChar* ML_(dinfo_strdup)( HChar* cc, const UChar* str ); +UChar* ML_(dinfo_memdup)( HChar* cc, UChar* str, SizeT nStr ); + +/* A handy type, a la Haskell's Maybe type. Yes, I know, C sucks. + Been there. Done that. Seen the movie. Got the T-shirt. Etc. */ +typedef struct { ULong ul; Bool b; } MaybeULong; + + +#endif /* ndef __PRIV_MISC_H */ + +/*--------------------------------------------------------------------*/ +/*--- end priv_misc.h ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_readdwarf.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_readdwarf.h.svn-base new file mode 100644 index 0000000..dc9fb99 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_readdwarf.h.svn-base @@ -0,0 +1,72 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read DWARF1/2/3 debug info. priv_readdwarf.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_READDWARF_H +#define __PRIV_READDWARF_H + +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + + +/* -------------------- + DWARF3 reader + -------------------- */ +extern +void ML_(read_debuginfo_dwarf3) + ( struct _DebugInfo* di, + UChar* debug_info_img, Word debug_info_sz, /* .debug_info */ + UChar* debug_abbv_img, Word debug_abbv_sz, /* .debug_abbrev */ + UChar* debug_line_img, Word debug_line_sz, /* .debug_line */ + UChar* debug_str_img, Word debug_str_sz ); /* .debug_str */ + +/* -------------------- + DWARF1 reader + -------------------- */ +extern +void ML_(read_debuginfo_dwarf1) ( struct _DebugInfo* di, + UChar* dwarf1d, Int dwarf1d_sz, + UChar* dwarf1l, Int dwarf1l_sz ); + +/* -------------------- + CFI reader + -------------------- */ +extern +void ML_(read_callframe_info_dwarf3) + ( /*OUT*/struct _DebugInfo* di, UChar* ehframe ); + + +#endif /* ndef __PRIV_READDWARF_H */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_readdwarf3.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_readdwarf3.h.svn-base new file mode 100644 index 0000000..2ffb2f4 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_readdwarf3.h.svn-base @@ -0,0 +1,57 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read DWARF3 ".debug_info" sections (DIE trees). ---*/ +/*--- priv_readdwarf3.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2008-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +#ifndef __PRIV_READDWARF3_H +#define __PRIV_READDWARF3_H + + +/* Read DWARF3 ".debug_info" sections. */ +void +ML_(new_dwarf3_reader) ( + struct _DebugInfo* di, + UChar* debug_info_img, SizeT debug_info_sz, + UChar* debug_abbv_img, SizeT debug_abbv_sz, + UChar* debug_line_img, SizeT debug_line_sz, + UChar* debug_str_img, SizeT debug_str_sz, + UChar* debug_ranges_img, SizeT debug_ranges_sz, + UChar* debug_loc_img, SizeT debug_loc_sz +); + +#endif /* ndef __PRIV_READDWARF3_H */ + +/*--------------------------------------------------------------------*/ +/*--- end priv_readdwarf3.h ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_readelf.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_readelf.h.svn-base new file mode 100644 index 0000000..e0955c4 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_readelf.h.svn-base @@ -0,0 +1,58 @@ + +/*--------------------------------------------------------------------*/ +/*--- Reading of syms & debug info from ELF .so/executable files. ---*/ +/*--- priv_readelf.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_READELF_H +#define __PRIV_READELF_H + +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +/* Identify an ELF object file by peering at the first few bytes of + it. */ +extern Bool ML_(is_elf_object_file)( void* image, SizeT n_image ); + +/* The central function for reading ELF debug info. For the + object/exe specified by the SegInfo, find ELF sections, then read + the symbols, line number info, file name info, CFA (stack-unwind + info) and anything else we want, into the tables within the + supplied SegInfo. +*/ +extern Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ); + + +#endif /* ndef __PRIV_READELF_H */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_readpdb.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_readpdb.h.svn-base new file mode 100644 index 0000000..eb88017 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_readpdb.h.svn-base @@ -0,0 +1,53 @@ + +/*--------------------------------------------------------------------*/ +/*--- Reading of syms & debug info from PDB-format files. ---*/ +/*--- priv_readpdb.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + Spring 2008: + derived from readelf.c and valgrind-20031012-wine/vg_symtab2.c + derived from wine-1.0/tools/winedump/pdb.c and msc.c + + Copyright (C) 2000-2008 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_READPDB_H +#define __PRIV_READPDB_H + +/* Returns True if OK, False for any kind of failure. */ +extern Bool ML_(read_pdb_debug_info)( + DebugInfo* di, + Addr obj_avma, + PtrdiffT unknown_purpose__reloc, + void* pdbimage, + SizeT n_pdbimage, + Char* pdbname, + ULong pdbmtime + ); + +#endif /* ndef __PRIV_READPDB_H */ + +/*--------------------------------------------------------------------*/ +/*--- end priv_readpdb.h ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_readstabs.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_readstabs.h.svn-base new file mode 100644 index 0000000..c7a42f0 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_readstabs.h.svn-base @@ -0,0 +1,52 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read 'stabs' format debug info. priv_readstabs.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#ifndef __PRIV_READSTABS_H +#define __PRIV_READSTABS_H + +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +/* -------------------- + Stabs reader + -------------------- */ +extern +void ML_(read_debuginfo_stabs) ( struct _DebugInfo* di, + UChar* stabC, Int stab_sz, + UChar* stabstr, Int stabstr_sz ); + +#endif /* ndef __PRIV_READSTABS_H */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_readxcoff.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_readxcoff.h.svn-base new file mode 100644 index 0000000..55e6de8 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_readxcoff.h.svn-base @@ -0,0 +1,49 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read XCOFF format debug info. priv_readxcoff.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2006-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +#ifndef __PRIV_READXCOFF_H +#define __PRIV_READXCOFF_H + + +/* Read whatever info we can from an XCOFF object file. */ +extern +Bool ML_(read_xcoff_debug_info) ( struct _DebugInfo* di, + Bool is_mainexe ); + +#endif /* ndef __PRIV_READXCOFF_H */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_storage.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_storage.h.svn-base new file mode 100644 index 0000000..50abe79 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_storage.h.svn-base @@ -0,0 +1,684 @@ + +/*--------------------------------------------------------------------*/ +/*--- Format-neutral storage of and querying of info acquired from ---*/ +/*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/ +/*--- priv_storage.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ +/* See comment at top of debuginfo.c for explanation of + the _svma / _avma / _image / _bias naming scheme. +*/ +/* Note this is not freestanding; needs pub_core_xarray.h and + priv_tytypes.h to be included before it. */ + +#ifndef __PRIV_STORAGE_H +#define __PRIV_STORAGE_H + +/* --------------------- SYMBOLS --------------------- */ + +/* A structure to hold an ELF/XCOFF symbol (very crudely). */ +typedef + struct { + Addr addr; /* lowest address of entity */ + Addr tocptr; /* ppc64-linux only: value that R2 should have */ + UChar *name; /* name */ + UInt size; /* size in bytes */ + Bool isText; + } + DiSym; + +/* --------------------- SRCLOCS --------------------- */ + +/* Line count at which overflow happens, due to line numbers being + stored as shorts in `struct nlist' in a.out.h. */ +#define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) + +#define LINENO_BITS 20 +#define LOC_SIZE_BITS (32 - LINENO_BITS) +#define MAX_LINENO ((1 << LINENO_BITS) - 1) + +/* Unlikely to have any lines with instruction ranges > 4096 bytes */ +#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) + +/* Number used to detect line number overflows; if one line is + 60000-odd smaller than the previous, it was probably an overflow. + */ +#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) + +/* A structure to hold addr-to-source info for a single line. There + can be a lot of these, hence the dense packing. */ +typedef + struct { + /* Word 1 */ + Addr addr; /* lowest address for this line */ + /* Word 2 */ + UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */ + UInt lineno:LINENO_BITS; /* source line number, or zero */ + /* Word 3 */ + UChar* filename; /* source filename */ + /* Word 4 */ + UChar* dirname; /* source directory name */ + } + DiLoc; + +/* --------------------- CF INFO --------------------- */ + +/* A structure to summarise DWARF2/3 CFA info for the code address + range [base .. base+len-1]. In short, if you know (sp,fp,ip) at + some point and ip is in the range [base .. base+len-1], it tells + you how to calculate (sp,fp) for the caller of the current frame + and also ra, the return address of the current frame. + + First off, calculate CFA, the Canonical Frame Address, thusly: + + cfa = case cfa_how of + CFIC_SPREL -> sp + cfa_off + CFIC_FPREL -> fp + cfa_off + CFIR_EXPR -> expr whose index is in cfa_off + + Once that is done, the previous frame's sp/fp values and this + frame's ra value can be calculated like this: + + old_sp/fp/ra + = case sp/fp/ra_how of + CFIR_UNKNOWN -> we don't know, sorry + CFIR_SAME -> same as it was before (sp/fp only) + CFIR_CFAREL -> cfa + sp/fp/ra_off + CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) + CFIR_EXPR -> expr whose index is in sp/fp/ra_off +*/ + +#define CFIC_SPREL ((UChar)1) +#define CFIC_FPREL ((UChar)2) +#define CFIC_EXPR ((UChar)3) + +#define CFIR_UNKNOWN ((UChar)4) +#define CFIR_SAME ((UChar)5) +#define CFIR_CFAREL ((UChar)6) +#define CFIR_MEMCFAREL ((UChar)7) +#define CFIR_EXPR ((UChar)8) + +typedef + struct { + Addr base; + UInt len; + UChar cfa_how; /* a CFIC_ value */ + UChar ra_how; /* a CFIR_ value */ + UChar sp_how; /* a CFIR_ value */ + UChar fp_how; /* a CFIR_ value */ + Int cfa_off; + Int ra_off; + Int sp_off; + Int fp_off; + } + DiCfSI; + + +typedef + enum { + Cop_Add=0x321, + Cop_Sub, + Cop_And, + Cop_Mul + } + CfiOp; + +typedef + enum { + Creg_SP=0x213, + Creg_FP, + Creg_IP + } + CfiReg; + +typedef + enum { + Cex_Undef=0x123, + Cex_Deref, + Cex_Const, + Cex_Binop, + Cex_CfiReg, + Cex_DwReg + } + CfiExprTag; + +typedef + struct { + CfiExprTag tag; + union { + struct { + } Undef; + struct { + Int ixAddr; + } Deref; + struct { + UWord con; + } Const; + struct { + CfiOp op; + Int ixL; + Int ixR; + } Binop; + struct { + CfiReg reg; + } CfiReg; + struct { + Int reg; + } DwReg; + } + Cex; + } + CfiExpr; + +extern Int ML_(CfiExpr_Undef) ( XArray* dst ); +extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr ); +extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con ); +extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiOp op, Int ixL, Int ixR ); +extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg ); +extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg ); + +extern void ML_(ppCfiExpr)( XArray* src, Int ix ); + +/* ---------------- FPO INFO (Windows PE) -------------- */ + +/* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like + a primitive CFI */ +typedef + struct _FPO_DATA { /* 16 bytes */ + UInt ulOffStart; /* offset of 1st byte of function code */ + UInt cbProcSize; /* # bytes in function */ + UInt cdwLocals; /* # bytes/4 in locals */ + UShort cdwParams; /* # bytes/4 in params */ + UChar cbProlog; /* # bytes in prolog */ + UChar cbRegs :3; /* # regs saved */ + UChar fHasSEH:1; /* Structured Exception Handling */ + UChar fUseBP :1; /* EBP has been used */ + UChar reserved:1; + UChar cbFrame:2; /* frame type */ + } + FPO_DATA; + +#define PDB_FRAME_FPO 0 +#define PDB_FRAME_TRAP 1 +#define PDB_FRAME_TSS 2 + +/* --------------------- VARIABLES --------------------- */ + +typedef + struct { + Addr aMin; + Addr aMax; + XArray* /* of DiVariable */ vars; + } + DiAddrRange; + +typedef + struct { + UChar* name; /* in DebugInfo.strchunks */ + UWord typeR; /* a cuOff */ + GExpr* gexpr; /* on DebugInfo.gexprs list */ + GExpr* fbGX; /* SHARED. */ + UChar* fileName; /* where declared; may be NULL. in + DebugInfo.strchunks */ + Int lineNo; /* where declared; may be zero. */ + } + DiVariable; + +Word +ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV ); + +/* --------------------- DEBUGINFO --------------------- */ + +/* This is the top-level data type. It's a structure which contains + information pertaining to one mapped ELF object. This type is + exported only abstractly - in pub_tool_debuginfo.h. */ + +#define SEGINFO_STRCHUNKSIZE (64*1024) + +struct _DebugInfo { + + /* Admin stuff */ + + struct _DebugInfo* next; /* list of DebugInfos */ + Bool mark; /* marked for deletion? */ + + /* An abstract handle, which can be used by entities outside of + m_debuginfo to (in an abstract datatype sense) refer to this + struct _DebugInfo. A .handle of zero is invalid; valid handles + are 1 and above. The same handle is never issued twice (in any + given run of Valgrind), so a handle becomes invalid when the + associated struct _DebugInfo is discarded, and remains invalid + forever thereafter. The .handle field is set as soon as this + structure is allocated. */ + ULong handle; + + /* Used for debugging only - indicate what stuff to dump whilst + reading stuff into the seginfo. Are computed as early in the + lifetime of the DebugInfo as possible -- at the point when it is + created. Use these when deciding what to spew out; do not use + the global VG_(clo_blah) flags. */ + + Bool trace_symtab; /* symbols, our style */ + Bool trace_cfi; /* dwarf frame unwind, our style */ + Bool ddump_syms; /* mimic /usr/bin/readelf --syms */ + Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */ + Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */ + + /* Fields that must be filled in before we can start reading + anything from the ELF file. These fields are filled in by + VG_(di_notify_mmap) and its immediate helpers. */ + + UChar* filename; /* in mallocville (VG_AR_DINFO) */ + UChar* memname; /* also in VG_AR_DINFO. AIX5 only: .a member name */ + + Bool have_rx_map; /* did we see a r?x mapping yet for the file? */ + Bool have_rw_map; /* did we see a rw? mapping yet for the file? */ + + Addr rx_map_avma; /* these fields record the file offset, length */ + SizeT rx_map_size; /* and map address of the r?x mapping we believe */ + OffT rx_map_foff; /* is the .text segment mapping */ + + Addr rw_map_avma; /* ditto, for the rw? mapping we believe is the */ + SizeT rw_map_size; /* .data segment mapping */ + OffT rw_map_foff; + + /* Once both a rw? and r?x mapping for .filename have been + observed, we can go on to read the symbol tables and debug info. + .have_dinfo flags when that has happened. */ + /* If have_dinfo is False, then all fields except "*rx_map*" and + "*rw_map*" are invalid and should not be consulted. */ + Bool have_dinfo; /* initially False */ + + /* All the rest of the fields in this structure are filled in once + we have committed to reading the symbols and debug info (that + is, at the point where .have_dinfo is set to True). */ + + /* The file's soname. FIXME: ensure this is always allocated in + VG_AR_DINFO. */ + UChar* soname; + + /* Description of some important mapped segments. The presence or + absence of the mapping is denoted by the _present field, since + in some obscure circumstances (to do with data/sdata/bss) it is + possible for the mapping to be present but have zero size. + Certainly text_ is mandatory on all platforms; not sure about + the rest though. + + -------------------------------------------------------- + + Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that + + either (rx_map_size == 0 && cfsi == NULL) (the degenerate case) + + or the normal case, which is the AND of the following: + (0) rx_map_size > 0 + (1) no two DebugInfos with rx_map_size > 0 + have overlapping [rx_map_avma,+rx_map_size) + (2) [cfsi_minavma,cfsi_maxavma] does not extend + beyond [rx_map_avma,+rx_map_size); that is, the former is a + subrange or equal to the latter. + (3) all DiCfSI in the cfsi array all have ranges that fall within + [rx_map_avma,+rx_map_size). + (4) all DiCfSI in the cfsi array are non-overlapping + + The cumulative effect of these restrictions is to ensure that + all the DiCfSI records in the entire system are non overlapping. + Hence any address falls into either exactly one DiCfSI record, + or none. Hence it is safe to cache the results of searches for + DiCfSI records. This is the whole point of these restrictions. + The caching of DiCfSI searches is done in VG_(use_CF_info). The + cache is flushed after any change to debugInfo_list. DiCfSI + searches are cached because they are central to stack unwinding + on amd64-linux. + + Where are these invariants imposed and checked? + + They are checked after a successful read of debuginfo into + a DebugInfo*, in check_CFSI_related_invariants. + + (1) is not really imposed anywhere. We simply assume that the + kernel will not map the text segments from two different objects + into the same space. Sounds reasonable. + + (2) follows from (4) and (3). It is ensured by canonicaliseCFI. + (3) is ensured by ML_(addDiCfSI). + (4) is ensured by canonicaliseCFI. + + -------------------------------------------------------- + + Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields: + + The _debug_{svma,bias} fields were added as part of a fix to + #185816. The problem encompassed in that bug report was that it + wasn't correct to use apply the bias values deduced for a + primary object to its associated debuginfo object, because the + debuginfo object (or the primary) could have been prelinked to a + different SVMA. Hence debuginfo and primary objects need to + have their own biases. + + ------ JRS: (referring to r9329): ------ + Let me see if I understand the workings correctly. Initially + the _debug_ values are set to the same values as the "normal" + ones, as there's a bunch of bits of code like this (in + readelf.c) + + di->text_svma = svma; + ... + di->text_bias = rx_bias; + di->text_debug_svma = svma; + di->text_debug_bias = rx_bias; + + If a debuginfo object subsequently shows up then the + _debug_svma/bias are set for the debuginfo object. Result is + that if there's no debuginfo object then the values are the same + as the primary-object values, and if there is a debuginfo object + then they will (or at least may) be different. + + Then when we need to actually bias something, we'll have to + decide whether to use the primary bias or the debuginfo bias. + And the strategy is to use the primary bias for ELF symbols but + the debuginfo bias for anything pulled out of Dwarf. + + ------ THH: ------ + Correct - the debug_svma and bias values apply to any address + read from the debug data regardless of where that debug data is + stored and the other values are used for addresses from other + places (primarily the symbol table). + + ------ JRS: ------ + Ok; so this was my only area of concern. Are there any + corner-case scenarios where this wouldn't be right? It sounds + like we're assuming the ELF symbols come from the primary object + and, if there is a debug object, then all the Dwarf comes from + there. But what if (eg) both symbols and Dwarf come from the + debug object? Is that even possible or allowable? + + ------ THH: ------ + You may have a point... + + The current logic is to try and take any one set of data from + either the base object or the debug object. There are four sets + of data we consider: + + - Symbol Table + - Stabs + - DWARF1 + - DWARF2 + + If we see the primary section for a given set in the base object + then we ignore all sections relating to that set in the debug + object. + + Now in principle if we saw a secondary section (like debug_line + say) in the base object, but not the main section (debug_info in + this case) then we would take debug_info from the debug object + but would use the debug_line from the base object unless we saw + a replacement copy in the debug object. That's probably unlikely + however. + + A bigger issue might be, as you say, the symbol table as we will + pick that up from the debug object if it isn't in the base. The + dynamic symbol table will always have to be in the base object + though so we will have to be careful when processing symbols to + know which table we are reading in that case. + + What we probably need to do is tell read_elf_symtab which object + the symbols it is being asked to read came from. + + (A followup patch to deal with this was committed in r9469). + */ + /* .text */ + Bool text_present; + Addr text_avma; + Addr text_svma; + SizeT text_size; + PtrdiffT text_bias; + Addr text_debug_svma; + PtrdiffT text_debug_bias; + /* .data */ + Bool data_present; + Addr data_svma; + Addr data_avma; + SizeT data_size; + PtrdiffT data_bias; + Addr data_debug_svma; + PtrdiffT data_debug_bias; + /* .sdata */ + Bool sdata_present; + Addr sdata_svma; + Addr sdata_avma; + SizeT sdata_size; + PtrdiffT sdata_bias; + Addr sdata_debug_svma; + PtrdiffT sdata_debug_bias; + /* .rodata */ + Bool rodata_present; + Addr rodata_svma; + Addr rodata_avma; + SizeT rodata_size; + PtrdiffT rodata_bias; + Addr rodata_debug_svma; + PtrdiffT rodata_debug_bias; + /* .bss */ + Bool bss_present; + Addr bss_svma; + Addr bss_avma; + SizeT bss_size; + PtrdiffT bss_bias; + Addr bss_debug_svma; + PtrdiffT bss_debug_bias; + /* .sbss */ + Bool sbss_present; + Addr sbss_svma; + Addr sbss_avma; + SizeT sbss_size; + PtrdiffT sbss_bias; + Addr sbss_debug_svma; + PtrdiffT sbss_debug_bias; + /* .plt */ + Bool plt_present; + Addr plt_avma; + SizeT plt_size; + /* .got */ + Bool got_present; + Addr got_avma; + SizeT got_size; + /* .got.plt */ + Bool gotplt_present; + Addr gotplt_avma; + SizeT gotplt_size; + /* .opd -- needed on ppc64-linux for finding symbols */ + Bool opd_present; + Addr opd_avma; + SizeT opd_size; + /* .ehframe -- needed on amd64-linux for stack unwinding */ + Bool ehframe_present; + Addr ehframe_avma; + SizeT ehframe_size; + + /* Sorted tables of stuff we snarfed from the file. This is the + eventual product of reading the debug info. All this stuff + lives in VG_AR_DINFO. */ + + /* An expandable array of symbols. */ + DiSym* symtab; + UWord symtab_used; + UWord symtab_size; + /* An expandable array of locations. */ + DiLoc* loctab; + UWord loctab_used; + UWord loctab_size; + /* An expandable array of CFI summary info records. Also includes + summary address bounds, showing the min and max address covered + by any of the records, as an aid to fast searching. And, if the + records require any expression nodes, they are stored in + cfsi_exprs. */ + DiCfSI* cfsi; + UWord cfsi_used; + UWord cfsi_size; + Addr cfsi_minavma; + Addr cfsi_maxavma; + XArray* cfsi_exprs; /* XArray of CfiExpr */ + + /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted + data. Non-expandable array, hence .size == .used. */ + FPO_DATA* fpo; + UWord fpo_size; + Addr fpo_minavma; + Addr fpo_maxavma; + + /* Expandable arrays of characters -- the string table. Pointers + into this are stable (the arrays are not reallocated). */ + struct strchunk { + UInt strtab_used; + struct strchunk* next; + UChar strtab[SEGINFO_STRCHUNKSIZE]; + } *strchunks; + + /* Variable scope information, as harvested from Dwarf3 files. + + In short it's an + + array of (array of PC address ranges and variables) + + The outer array indexes over scopes, with Entry 0 containing + information on variables which exist for any value of the program + counter (PC) -- that is, the outermost scope. Entries 1, 2, 3, + etc contain information on increasinly deeply nested variables. + + Each inner array is an array of (an address range, and a set + of variables that are in scope over that address range). + + The address ranges may not overlap. + + Since Entry 0 in the outer array holds information on variables + that exist for any value of the PC (that is, global vars), it + follows that Entry 0's inner array can only have one address + range pair, one that covers the entire address space. + */ + XArray* /* of OSet of DiAddrRange */varinfo; + + /* These are arrays of the relevant typed objects, held here + partially for the purposes of visiting each object exactly once + when we need to delete them. */ + + /* An array of TyEnts. These are needed to make sense of any types + in the .varinfo. Also, when deleting this DebugInfo, we must + first traverse this array and throw away malloc'd stuff hanging + off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */ + XArray* /* of TyEnt */ admin_tyents; + + /* An array of guarded DWARF3 expressions. */ + XArray* admin_gexprs; +}; + +/* --------------------- functions --------------------- */ + +/* ------ Adding ------ */ + +/* Add a symbol to si's symbol table. */ +extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ); + +/* Add a line-number record to a DebugInfo. */ +extern +void ML_(addLineInfo) ( struct _DebugInfo* di, + UChar* filename, + UChar* dirname, /* NULL is allowable */ + Addr this, Addr next, Int lineno, Int entry); + +/* Add a CFI summary record. The supplied DiCfSI is copied. */ +extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi ); + +/* Add a string to the string table of a DebugInfo. If len==-1, + ML_(addStr) will itself measure the length of the string. */ +extern UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len ); + +extern void ML_(addVar)( struct _DebugInfo* di, + Int level, + Addr aMin, + Addr aMax, + UChar* name, + UWord typeR, /* a cuOff */ + GExpr* gexpr, + GExpr* fbGX, /* SHARED. */ + UChar* fileName, /* where decl'd - may be NULL */ + Int lineNo, /* where decl'd - may be zero */ + Bool show ); + +/* Canonicalise the tables held by 'di', in preparation for use. Call + this after finishing adding entries to these tables. */ +extern void ML_(canonicaliseTables) ( struct _DebugInfo* di ); + +/* ------ Searching ------ */ + +/* Find a symbol-table index containing the specified pointer, or -1 + if not found. Binary search. */ +extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr, + Bool match_anywhere_in_sym, + Bool findText ); + +/* Find a location-table index containing the specified pointer, or -1 + if not found. Binary search. */ +extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr ); + +/* Find a CFI-table index containing the specified pointer, or -1 if + not found. Binary search. */ +extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr ); + +/* Find a FPO-table index containing the specified pointer, or -1 + if not found. Binary search. */ +extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr ); + +/* ------ Misc ------ */ + +/* Show a non-fatal debug info reading error. Use vg_panic if + terminal. 'serious' errors are always shown, not 'serious' ones + are shown only at verbosity level 2 and above. */ +extern +void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg ); + +/* Print a symbol. */ +extern void ML_(ppSym) ( Int idx, DiSym* sym ); + +/* Print a call-frame-info summary. */ +extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si ); + + +#define TRACE_SYMTAB(format, args...) \ + if (di->trace_symtab) { VG_(printf)(format, ## args); } + + +#endif /* ndef __PRIV_STORAGE_H */ + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/priv_tytypes.h.svn-base b/coregrind/m_debuginfo/.svn/text-base/priv_tytypes.h.svn-base new file mode 100644 index 0000000..880aa0e --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/priv_tytypes.h.svn-base @@ -0,0 +1,212 @@ + +/*--------------------------------------------------------------------*/ +/*--- Representation of source level types. priv_tytypes.h ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2008-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +#ifndef __PRIV_TYTYPES_H +#define __PRIV_TYTYPES_H + +typedef + enum { + Te_EMPTY=10, /* empty (contains no info) */ + Te_INDIR, /* indirection to some other TyEnt */ + Te_UNKNOWN, /* denotes a unknown type/field/whatever */ + Te_Atom, /* name & 64-bit const, iow, enumeration member */ + Te_Field, /* struct/class field defn */ + Te_Bound, /* array bounds indication, for one dimension */ + Te_TyBase, /* base type */ + Te_TyPorR, /* pointer or reference type */ + Te_TyTyDef, /* a renaming of some other type */ + Te_TyStOrUn, /* structure or union type */ + Te_TyEnum, /* an enum type */ + Te_TyArray, /* an array type */ + Te_TyFn, /* function type */ + Te_TyQual, /* qualified type */ + Te_TyVoid /* void type */ + } + TyEntTag; + +/* Fields ending in "R" are references to other TyEnts. Fields ending + in "Rs" are XArray*s of references to other TyEnts. */ +typedef + struct { + UWord cuOff; + TyEntTag tag; + union { + struct { + } EMPTY; + struct { + UWord indR; + } INDIR; + struct { + } UNKNOWN; + struct { + UChar* name; /* in mallocville */ + Bool valueKnown; /* atoms w/ unknown value are possible */ + Long value; + } Atom; + struct { + UChar* name; /* in mallocville */ + UWord typeR; /* should be Te_TyXXXX */ + UChar* loc; /* location expr, in mallocville */ + UWord nLoc; /* number of bytes in .loc */ + Bool isStruct; + } Field; + struct { + Bool knownL; + Bool knownU; + Long boundL; + Long boundU; + } Bound; + struct { + UChar* name; /* in mallocville */ + Int szB; + UChar enc; /* S:signed U:unsigned F:floating C:complex float */ + } TyBase; + struct { + Int szB; + UWord typeR; + Bool isPtr; + } TyPorR; + struct { + UChar* name; /* in mallocville */ + UWord typeR; /* MAY BE D3_INVALID_CUOFF, denoting unknown */ + } TyTyDef; + struct { + UChar* name; /* in mallocville */ + UWord szB; + XArray* /* of UWord */ fieldRs; + Bool complete; + Bool isStruct; + } TyStOrUn; + struct { + UChar* name; /* in mallocville */ + Int szB; + XArray* /* of UWord */ atomRs; + } TyEnum; + struct { + UWord typeR; + XArray* /* of UWord */ boundRs; + } TyArray; + struct { + } TyFn; + struct { + UChar qual; /* C:const V:volatile */ + UWord typeR; + } TyQual; + struct { + Bool isFake; /* True == introduced by the reader */ + } TyVoid; + } Te; + } + TyEnt; + +/* Does this TyEnt denote a type, as opposed to some other kind of + thing? */ +Bool ML_(TyEnt__is_type)( TyEnt* ); + +/* Print a TyEnt, debug-style. */ +void ML_(pp_TyEnt)( TyEnt* ); + +/* Print a whole XArray of TyEnts, debug-style */ +void ML_(pp_TyEnts)( XArray* tyents, HChar* who ); + +/* Print a TyEnt, C style, chasing stuff as necessary. */ +void ML_(pp_TyEnt_C_ishly)( XArray* /* of TyEnt */ tyents, + UWord cuOff ); + +/* Generates a total ordering on TyEnts based only on their .cuOff + fields. */ +Word ML_(TyEnt__cmp_by_cuOff_only) ( TyEnt* te1, TyEnt* te2 ); + +/* Generates a total ordering on TyEnts based on everything except + their .cuOff fields. */ +Word ML_(TyEnt__cmp_by_all_except_cuOff) ( TyEnt* te1, TyEnt* te2 ); + +/* Free up all directly or indirectly heap-allocated stuff attached to + this TyEnt, and set its tag to Te_EMPTY. The .cuOff field is + unchanged. */ +void ML_(TyEnt__make_EMPTY) ( TyEnt* te ); + +/* How big is this type? If .b in the returned struct is False, the + size is unknown. */ + +MaybeULong ML_(sizeOfType)( XArray* /* of TyEnt */ tyents, + UWord cuOff ); + +/* Describe where in the type 'offset' falls. Caller must + deallocate the resulting XArray. */ +XArray* /*UChar*/ ML_(describe_type)( /*OUT*/PtrdiffT* residual_offset, + XArray* /* of TyEnt */ tyents, + UWord ty_cuOff, + PtrdiffT offset ); + + +/* A fast-lookup cache for ML_(TyEnts__index_by_cuOff). Nothing + particularly surprising here; it's 2 way set associative, with some + number of ways, doesn't particularly have to be a power of 2. In + order to have a way to indicate an invalid entry, we set the second + value of the pair to NULL, and keep checking for it, since + unfortunately there's no obvious cuOff number that we could put in + the first word of the pair that could indicate an invalid entry. + + 4096 arrived at as the best value for an E6600 loading Qt-4.4.1 + Designer and all associated libraries, compiled by gcc-4.3.1, + -g -O, 64-bit, which is at least a moderately good stress test, + with the largest library being about 150MB.*/ + +#define N_TYENT_INDEX_CACHE 4096 + +typedef + struct { + struct { UWord cuOff0; TyEnt* ent0; + UWord cuOff1; TyEnt* ent1; } + ce[N_TYENT_INDEX_CACHE]; + } + TyEntIndexCache; + +void ML_(TyEntIndexCache__invalidate) ( TyEntIndexCache* cache ); + +/* 'ents' is an XArray of TyEnts, sorted by their .cuOff fields. Find + the entry which has .cuOff field as specified. Returns NULL if not + found. Asserts if more than one entry has the specified .cuOff + value. */ +TyEnt* ML_(TyEnts__index_by_cuOff) ( XArray* /* of TyEnt */ ents, + TyEntIndexCache* cache, + UWord cuOff_to_find ); + +#endif /* ndef __PRIV_TYTYPES_H */ + +/*--------------------------------------------------------------------*/ +/*--- end priv_tytypes.h ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/readdwarf.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/readdwarf.c.svn-base new file mode 100644 index 0000000..3bb776e --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/readdwarf.c.svn-base @@ -0,0 +1,3821 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read DWARF1/2/3 debug info. readdwarf.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +#include "pub_core_basics.h" +#include "pub_core_debuginfo.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_options.h" +#include "pub_core_xarray.h" +#include "priv_misc.h" /* dinfo_zalloc/free/strdup */ +#include "priv_d3basics.h" +#include "priv_tytypes.h" +#include "priv_storage.h" +#include "priv_readdwarf.h" /* self */ + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Read line number and CFI info from DWARF1, DWARF2 ---*/ +/*--- and to some extent DWARF3 sections. ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/*------------------------------------------------------------*/ +/*--- Expanding arrays of words, for holding file name and ---*/ +/*--- directory name arrays. ---*/ +/*------------------------------------------------------------*/ + +typedef + struct { + Word* tab; + UInt tab_size; + UInt tab_used; + } + WordArray; + +static void init_WordArray ( WordArray* wa ) +{ + wa->tab = NULL; + wa->tab_size = 0; + wa->tab_used = 0; +} + +static void free_WordArray ( WordArray* wa ) +{ + if (wa->tab) { + vg_assert(wa->tab_size > 0); + ML_(dinfo_free)(wa->tab); + } + init_WordArray(wa); +} + +static void addto_WordArray ( WordArray* wa, Word w ) +{ + UInt new_size, i; + Word* new_tab; + + if (0) VG_(printf)("<<ADD %p (new sz = %d) >>\n", + (HChar*)w, wa->tab_used+1); + + if (wa->tab_used < wa->tab_size) { + /* fine */ + } else { + /* expand array */ + if (0) VG_(printf)("EXPAND ARRAY from %d\n", wa->tab_size); + vg_assert(wa->tab_used == wa->tab_size); + vg_assert( (wa->tab_size == 0 && wa->tab == NULL) + || (wa->tab_size != 0 && wa->tab != NULL) ); + new_size = wa->tab_size == 0 ? 8 : 2 * wa->tab_size; + new_tab = ML_(dinfo_zalloc)("di.aWA.1", new_size * sizeof(Word)); + vg_assert(new_tab != NULL); + for (i = 0; i < wa->tab_used; i++) + new_tab[i] = wa->tab[i]; + wa->tab_size = new_size; + if (wa->tab) + ML_(dinfo_free)(wa->tab); + wa->tab = new_tab; + } + + vg_assert(wa->tab_used < wa->tab_size); + vg_assert(wa->tab_size > 0); + wa->tab[wa->tab_used] = w; + wa->tab_used++; +} + +static Word index_WordArray ( /*OUT*/Bool* inRange, WordArray* wa, Int i ) +{ + vg_assert(inRange); + if (i >= 0 && i < wa->tab_used) { + *inRange = True; + return wa->tab[i]; + } else { + *inRange = False; + return 0; + } +} + + +/*------------------------------------------------------------*/ +/*--- Read DWARF2 format line number info. ---*/ +/*------------------------------------------------------------*/ + +/* Structure holding info extracted from the a .debug_line + section. */ +typedef struct +{ + ULong li_length; + UShort li_version; + ULong li_header_length; + UChar li_min_insn_length; + UChar li_default_is_stmt; + Int li_line_base; + UChar li_line_range; + UChar li_opcode_base; +} +DebugLineInfo; + +/* Structure holding additional infos found from a .debug_info + * compilation unit block */ +typedef struct +{ + /* Feel free to add more members here if you need ! */ + Char* compdir; /* Compilation directory - points to .debug_info */ + Char* name; /* Main file name - points to .debug_info */ + ULong stmt_list; /* Offset in .debug_line */ + Bool dw64; /* 64-bit Dwarf? */ +} +UnitInfo; + +/* Line number opcodes. */ +enum dwarf_line_number_ops + { + DW_LNS_extended_op = 0, + DW_LNS_copy = 1, + DW_LNS_advance_pc = 2, + DW_LNS_advance_line = 3, + DW_LNS_set_file = 4, + DW_LNS_set_column = 5, + DW_LNS_negate_stmt = 6, + DW_LNS_set_basic_block = 7, + DW_LNS_const_add_pc = 8, + DW_LNS_fixed_advance_pc = 9, + /* DWARF 3. */ + DW_LNS_set_prologue_end = 10, + DW_LNS_set_epilogue_begin = 11, + DW_LNS_set_isa = 12 + }; + +/* Line number extended opcodes. */ +enum dwarf_line_number_x_ops + { + DW_LNE_end_sequence = 1, + DW_LNE_set_address = 2, + DW_LNE_define_file = 3 + }; + +typedef struct +{ + /* Information for the last statement boundary. + * Needed to calculate statement lengths. */ + Addr last_address; + UInt last_file; + UInt last_line; + + Addr address; + UInt file; + UInt line; + UInt column; + Int is_stmt; + Int basic_block; + Int end_sequence; +} LineSMR; + + +/* FIXME: duplicated in readdwarf3.c */ +static +ULong read_leb128 ( UChar* data, Int* length_return, Int sign ) +{ + ULong result = 0; + UInt num_read = 0; + Int shift = 0; + UChar byte; + + vg_assert(sign == 0 || sign == 1); + + do + { + byte = * data ++; + num_read ++; + + result |= ((ULong)(byte & 0x7f)) << shift; + + shift += 7; + + } + while (byte & 0x80); + + if (length_return != NULL) + * length_return = num_read; + + if (sign && (shift < 64) && (byte & 0x40)) + result |= -(1ULL << shift); + + return result; +} + +/* Small helper functions easier to use + * value is returned and the given pointer is + * moved past end of leb128 data */ +/* FIXME: duplicated in readdwarf3.c */ +static ULong read_leb128U( UChar **data ) +{ + Int len; + ULong val = read_leb128( *data, &len, 0 ); + *data += len; + return val; +} + +/* Same for signed data */ +/* FIXME: duplicated in readdwarf3.c */ +static Long read_leb128S( UChar **data ) +{ + Int len; + ULong val = read_leb128( *data, &len, 1 ); + *data += len; + return (Long)val; +} + +/* Read what the DWARF3 spec calls an "initial length field". This + uses up either 4 or 12 bytes of the input and produces a 32-bit or + 64-bit number respectively. + + Read 32-bit value from p. If it is 0xFFFFFFFF, instead read a + 64-bit bit value from p+4. This is used in 64-bit dwarf to encode + some table lengths. + + XXX this is a hack: the endianness of the initial length field is + specified by the DWARF we're reading. This happens to work only + because we don't do cross-arch jitting, hence this code runs on a + platform of the same endianness as the DWARF it is reading. Same + applies for initial lengths for CIE/FDEs and probably in zillions + of other places -- to be precise, exactly the places where + binutils/dwarf.c calls byte_get(). +*/ +static ULong read_initial_length_field ( UChar* p_img, /*OUT*/Bool* is64 ) +{ + UInt w32 = *((UInt*)p_img); + if (w32 == 0xFFFFFFFF) { + *is64 = True; + return *((ULong*)(p_img+4)); + } else { + *is64 = False; + return (ULong)w32; + } +} + + +static LineSMR state_machine_regs; + +static +void reset_state_machine ( Int is_stmt ) +{ + if (0) VG_(printf)("smr.a := %p (reset)\n", NULL ); + state_machine_regs.last_address = 0; + state_machine_regs.last_file = 1; + state_machine_regs.last_line = 1; + state_machine_regs.address = 0; + state_machine_regs.file = 1; + state_machine_regs.line = 1; + state_machine_regs.column = 0; + state_machine_regs.is_stmt = is_stmt; + state_machine_regs.basic_block = 0; + state_machine_regs.end_sequence = 0; +} + +/* Look up a directory name, or return NULL if unknown. */ +static +Char* lookupDir ( Int filename_index, + WordArray* fnidx2dir, + WordArray* dirnames ) +{ + Bool inRange; + Word diridx, dirname; + + diridx = index_WordArray( &inRange, fnidx2dir, filename_index ); + if (!inRange) goto bad; + + dirname = index_WordArray( &inRange, dirnames, (Int)diridx ); + if (!inRange) goto bad; + + return (Char*)dirname; + bad: + return NULL; +} + +//////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////// + +/* Handled an extended line op starting at 'data'. Returns the number + of bytes that 'data' should be advanced by. */ +static +Word process_extended_line_op( struct _DebugInfo* di, + WordArray* filenames, + WordArray* dirnames, + WordArray* fnidx2dir, + UChar* data, Int is_stmt) +{ + UChar op_code; + Int bytes_read; + UInt len; + UChar* name; + Addr adr; + + len = read_leb128 (data, & bytes_read, 0); + data += bytes_read; + + if (len == 0) { + VG_(message)(Vg_UserMsg, + "Warning: DWARF2 reader: " + "Badly formed extended line op encountered"); + return (Word)bytes_read; + } + + len += bytes_read; + op_code = * data ++; + + if (0) VG_(printf)("dwarf2: ext OPC: %d\n", op_code); + + switch (op_code) { + case DW_LNE_end_sequence: + if (0) VG_(printf)("1001: si->o %#lx, smr.a %#lx\n", + di->text_debug_bias, state_machine_regs.address ); + /* JRS: added for compliance with spec; is pointless due to + reset_state_machine below */ + state_machine_regs.end_sequence = 1; + + if (state_machine_regs.is_stmt) { + if (state_machine_regs.last_address) { + Bool inRange = False; + Char* filename + = (Char*)index_WordArray( &inRange, filenames, + state_machine_regs.last_file); + if (!inRange || !filename) + filename = "???"; + ML_(addLineInfo) ( + di, + filename, + lookupDir( state_machine_regs.last_file, + fnidx2dir, dirnames ), + di->text_debug_bias + state_machine_regs.last_address, + di->text_debug_bias + state_machine_regs.address, + state_machine_regs.last_line, 0 + ); + } + } + reset_state_machine (is_stmt); + if (di->ddump_line) + VG_(printf)(" Extended opcode %d: End of Sequence\n\n", + (Int)op_code); + break; + + case DW_LNE_set_address: + adr = *((Addr *)data); + state_machine_regs.address = adr; + if (di->ddump_line) + VG_(printf)(" Extended opcode %d: set Address to 0x%lx\n", + (Int)op_code, (Addr)adr); + break; + + case DW_LNE_define_file: + name = data; + addto_WordArray( filenames, (Word)ML_(addStr)(di,name,-1) ); + data += VG_(strlen) ((char *) data) + 1; + read_leb128 (data, & bytes_read, 0); + data += bytes_read; + read_leb128 (data, & bytes_read, 0); + data += bytes_read; + read_leb128 (data, & bytes_read, 0); + if (di->ddump_line) + VG_(printf)(" DWARF2-line: set_address\n"); + break; + + default: + if (di->ddump_line) + VG_(printf)("process_extended_line_op:default\n"); + break; + } + + return (Word)len; +} + +//////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////// + +/* read a .debug_line section block for a compilation unit + * + * Input: - theBlock must point to the start of the block + * for the given compilation unit + * - ui contains additional info like the compilation dir + * for this unit + * + * Output: - si debug info structures get updated + */ +static +void read_dwarf2_lineblock ( struct _DebugInfo* di, + UnitInfo* ui, + UChar* theBlock, /* IMAGE */ + Int noLargerThan ) +{ + Int i; + DebugLineInfo info; + UChar* standard_opcodes; + UChar* end_of_sequence; + Bool is64; + WordArray filenames; + WordArray dirnames; + WordArray fnidx2dir; + + UChar* external = theBlock; + UChar* data = theBlock; + + /* filenames is an array of file names harvested from the DWARF2 + info. Entry [0] is NULL and is never referred to by the state + machine. + + Similarly, dirnames is an array of directory names. Entry [0] + is also NULL and denotes "we don't know what the path is", since + that is different from "the path is the empty string". Unlike + the file name table, the state machine does refer to entry [0], + which basically means "." ("the current directory of the + compilation", whatever that means, according to the DWARF3 + spec.) + + fnidx2dir is an array of indexes into the dirnames table. + (confused yet?) filenames[] and fnidx2dir[] are indexed + together. That is, for some index i in the filename table, then + + the filename is filenames[i] + the directory is dirnames[ fnidx2dir[i] ] */ + + /* Fails due to gcc padding ... + vg_assert(sizeof(DWARF2_External_LineInfo) + == sizeof(DWARF2_Internal_LineInfo)); + */ + + init_WordArray(&filenames); + init_WordArray(&dirnames); + init_WordArray(&fnidx2dir); + + /* DWARF2 starts numbering filename entries at 1, so we need to + add a dummy zeroth entry to the table. The zeroth dirnames + entry denotes 'current directory of compilation' so we might + as well make the fnidx2dir zeroth entry denote that. + */ + addto_WordArray( &filenames, (Word)NULL ); + + if (ui->compdir) + addto_WordArray( &dirnames, (Word)ML_(addStr)(di, ui->compdir, -1) ); + else + addto_WordArray( &dirnames, (Word)ML_(addStr)(di, ".", -1) ); + + addto_WordArray( &fnidx2dir, (Word)0 ); /* compilation dir */ + + info.li_length = read_initial_length_field( external, &is64 ); + external += is64 ? 12 : 4; + if (di->ddump_line) + VG_(printf)(" Length: %llu\n", + info.li_length); + + /* Check the length of the block. */ + if (info.li_length > noLargerThan) { + ML_(symerr)(di, True, + "DWARF line info appears to be corrupt " + "- the section is too small"); + goto out; + } + + /* Check its version number. */ + info.li_version = * ((UShort *)external); + external += 2; + if (di->ddump_line) + VG_(printf)(" DWARF Version: %d\n", + (Int)info.li_version); + + if (info.li_version != 2) { + ML_(symerr)(di, True, + "Only DWARF version 2 line info " + "is currently supported."); + goto out; + } + + info.li_header_length = ui->dw64 ? *((ULong*)external) + : (ULong)(*((UInt*)external)); + external += ui->dw64 ? 8 : 4; + if (di->ddump_line) + VG_(printf)(" Prologue Length: %llu\n", + info.li_header_length); + + info.li_min_insn_length = * ((UChar *)external); + external += 1; + if (di->ddump_line) + VG_(printf)(" Minimum Instruction Length: %d\n", + (Int)info.li_min_insn_length); + + info.li_default_is_stmt = * ((UChar *)external); + external += 1; + if (di->ddump_line) + VG_(printf)(" Initial value of 'is_stmt': %d\n", + (Int)info.li_default_is_stmt); + + /* Josef Weidendorfer (20021021) writes: + + It seems to me that the Intel Fortran compiler generates bad + DWARF2 line info code: It sets "is_stmt" of the state machine in + the the line info reader to be always false. Thus, there is + never a statement boundary generated and therefore never a + instruction range/line number mapping generated for valgrind. + + Please have a look at the DWARF2 specification, Ch. 6.2 + (x86.ddj.com/ftp/manuals/tools/dwarf.pdf). Perhaps I understand + this wrong, but I don't think so. + + I just had a look at the GDB DWARF2 reader... They completely + ignore "is_stmt" when recording line info ;-) That's the reason + "objdump -S" works on files from the the intel fortran compiler. + + Therefore: */ + info.li_default_is_stmt = True; + + /* JRS: changed (UInt*) to (UChar*) */ + info.li_line_base = * ((UChar *)external); + info.li_line_base = (Int)(signed char)info.li_line_base; + external += 1; + if (di->ddump_line) + VG_(printf)(" Line Base: %d\n", + info.li_line_base); + + info.li_line_range = * ((UChar *)external); + external += 1; + if (di->ddump_line) + VG_(printf)(" Line Range: %d\n", + (Int)info.li_line_range); + + info.li_opcode_base = * ((UChar *)external); + external += 1; + if (di->ddump_line) + VG_(printf)(" Opcode Base: %d\n\n", + info.li_opcode_base); + + if (0) VG_(printf)("dwarf2: line base: %d, range %d, opc base: %d\n", + (Int)info.li_line_base, + (Int)info.li_line_range, + (Int)info.li_opcode_base); + + end_of_sequence = data + info.li_length + + (is64 ? 12 : 4); + + reset_state_machine (info.li_default_is_stmt); + + /* Read the contents of the Opcodes table. */ + standard_opcodes = external; + if (di->ddump_line) { + VG_(printf)(" Opcodes:\n"); + for (i = 1; i < (Int)info.li_opcode_base; i++) { + VG_(printf)(" Opcode %d has %d args\n", + i, (Int)standard_opcodes[i-1]); + } + VG_(printf)("\n"); + } + + /* Read the contents of the Directory table. */ + data = standard_opcodes + info.li_opcode_base - 1; + + if (di->ddump_line) + VG_(printf)(" The Directory Table%s\n", + *data == 0 ? " is empty." : ":" ); + + while (* data != 0) { + +# define NBUF 4096 + static Char buf[NBUF]; + + if (di->ddump_line) + VG_(printf)(" %s\n", data); + + /* If data[0] is '/', then 'data' is an absolute path and we + don't mess with it. Otherwise, if we can, construct the + 'path ui->compdir' ++ "/" ++ 'data'. */ + + if (*data != '/' + /* not an absolute path */ + && ui->compdir != NULL + /* actually got something sensible for compdir */ + && VG_(strlen)(ui->compdir) + VG_(strlen)(data) + 5/*paranoia*/ < NBUF + /* it's short enough to concatenate */) + { + buf[0] = 0; + VG_(strcat)(buf, ui->compdir); + VG_(strcat)(buf, "/"); + VG_(strcat)(buf, data); + vg_assert(VG_(strlen)(buf) < NBUF); + addto_WordArray( &dirnames, (Word)ML_(addStr)(di,buf,-1) ); + if (0) VG_(printf)("rel path %s\n", buf); + } else { + /* just use 'data'. */ + addto_WordArray( &dirnames, (Word)ML_(addStr)(di,data,-1) ); + if (0) VG_(printf)("abs path %s\n", data); + } + + data += VG_(strlen)(data) + 1; + +# undef NBUF + } + + if (di->ddump_line) + VG_(printf)("\n"); + + if (*data != 0) { + ML_(symerr)(di, True, + "can't find NUL at end of DWARF2 directory table"); + goto out; + } + data ++; + + /* Read the contents of the File Name table. This produces a bunch + of file names, and for each, an index to the corresponding + directory name entry. */ + if (di->ddump_line) { + VG_(printf)(" The File Name Table:\n"); + VG_(printf)(" Entry Dir Time Size Name\n"); + } + + i = 1; + while (* data != 0) { + UChar* name; + Int bytes_read, diridx; + Int uu_time, uu_size; /* unused, and a guess */ + name = data; + data += VG_(strlen) ((Char *) data) + 1; + + diridx = read_leb128 (data, & bytes_read, 0); + data += bytes_read; + uu_time = read_leb128 (data, & bytes_read, 0); + data += bytes_read; + uu_size = read_leb128 (data, & bytes_read, 0); + data += bytes_read; + + addto_WordArray( &filenames, (Word)ML_(addStr)(di,name,-1) ); + addto_WordArray( &fnidx2dir, (Word)diridx ); + if (0) VG_(printf)("file %s diridx %d\n", name, diridx ); + if (di->ddump_line) + VG_(printf)(" %d\t%d\t%d\t%d\t%s\n", + i, diridx, uu_time, uu_size, name); + i++; + } + + if (di->ddump_line) + VG_(printf)("\n"); + + if (*data != 0) { + ML_(symerr)(di, True, + "can't find NUL at end of DWARF2 file name table"); + goto out; + } + data ++; + + if (di->ddump_line) + VG_(printf)(" Line Number Statements:\n"); + + /* Now display the statements. */ + + while (data < end_of_sequence) { + + UChar op_code; + Int adv; + Int bytes_read; + + op_code = * data ++; + + if (0) VG_(printf)("dwarf2: OPC: %d\n", op_code); + + if (op_code >= info.li_opcode_base) { + + Int advAddr; + op_code -= info.li_opcode_base; + adv = (op_code / info.li_line_range) + * info.li_min_insn_length; + advAddr = adv; + state_machine_regs.address += adv; + + if (0) VG_(printf)("smr.a += %#x\n", adv ); + adv = (op_code % info.li_line_range) + info.li_line_base; + if (0) VG_(printf)("1002: di->o %#lx, smr.a %#lx\n", + di->text_debug_bias, state_machine_regs.address ); + state_machine_regs.line += adv; + + if (di->ddump_line) + VG_(printf)(" Special opcode %d: advance Address by %d " + "to 0x%lx and Line by %d to %d\n", + (Int)op_code, advAddr, state_machine_regs.address, + (Int)adv, (Int)state_machine_regs.line ); + + if (state_machine_regs.is_stmt) { + /* only add a statement if there was a previous boundary */ + if (state_machine_regs.last_address) { + Bool inRange = False; + Char* filename + = (Char*)index_WordArray( &inRange, &filenames, + state_machine_regs.last_file); + if (!inRange || !filename) + filename = "???"; + ML_(addLineInfo)( + di, + filename, + lookupDir( state_machine_regs.last_file, + &fnidx2dir, &dirnames ), + di->text_debug_bias + state_machine_regs.last_address, + di->text_debug_bias + state_machine_regs.address, + state_machine_regs.last_line, + 0 + ); + } + state_machine_regs.last_address = state_machine_regs.address; + state_machine_regs.last_file = state_machine_regs.file; + state_machine_regs.last_line = state_machine_regs.line; + } + + } + + else { /* ! (op_code >= info.li_opcode_base) */ + + switch (op_code) { + case DW_LNS_extended_op: + data += process_extended_line_op ( + di, &filenames, &dirnames, &fnidx2dir, + data, info.li_default_is_stmt); + break; + + case DW_LNS_copy: + if (0) VG_(printf)("1002: di->o %#lx, smr.a %#lx\n", + di->text_debug_bias, state_machine_regs.address ); + if (state_machine_regs.is_stmt) { + /* only add a statement if there was a previous boundary */ + if (state_machine_regs.last_address) { + Bool inRange = False; + Char* filename + = (Char*)index_WordArray( &inRange, &filenames, + state_machine_regs.last_file ); + if (!inRange || !filename) + filename = "???"; + ML_(addLineInfo)( + di, + filename, + lookupDir( state_machine_regs.last_file, + &fnidx2dir, &dirnames ), + di->text_debug_bias + state_machine_regs.last_address, + di->text_debug_bias + state_machine_regs.address, + state_machine_regs.last_line, + 0 + ); + } + state_machine_regs.last_address = state_machine_regs.address; + state_machine_regs.last_file = state_machine_regs.file; + state_machine_regs.last_line = state_machine_regs.line; + } + state_machine_regs.basic_block = 0; /* JRS added */ + if (di->ddump_line) + VG_(printf)(" Copy\n"); + break; + + case DW_LNS_advance_pc: + adv = info.li_min_insn_length + * read_leb128 (data, & bytes_read, 0); + data += bytes_read; + state_machine_regs.address += adv; + if (0) VG_(printf)("smr.a += %#x\n", adv ); + if (di->ddump_line) + VG_(printf)(" Advance PC by %d to 0x%lx\n", + (Int)adv, state_machine_regs.address); + break; + + case DW_LNS_advance_line: + adv = read_leb128 (data, & bytes_read, 1); + data += bytes_read; + state_machine_regs.line += adv; + if (di->ddump_line) + VG_(printf)(" Advance Line by %d to %d\n", + (Int)adv, (Int)state_machine_regs.line); + break; + + case DW_LNS_set_file: + adv = read_leb128 (data, & bytes_read, 0); + data += bytes_read; + state_machine_regs.file = adv; + if (di->ddump_line) + VG_(printf)(" Set File Name to entry %d in the File Name Table\n", + (Int)adv); + break; + + case DW_LNS_set_column: + adv = read_leb128 (data, & bytes_read, 0); + data += bytes_read; + state_machine_regs.column = adv; + if (di->ddump_line) + VG_(printf)(" DWARF2-line: set_column\n"); + break; + + case DW_LNS_negate_stmt: + adv = state_machine_regs.is_stmt; + adv = ! adv; + state_machine_regs.is_stmt = adv; + if (di->ddump_line) + VG_(printf)(" DWARF2-line: negate_stmt\n"); + break; + + case DW_LNS_set_basic_block: + state_machine_regs.basic_block = 1; + if (di->ddump_line) + VG_(printf)(" DWARF2-line: set_basic_block\n"); + break; + + case DW_LNS_const_add_pc: + adv = (((255 - info.li_opcode_base) / info.li_line_range) + * info.li_min_insn_length); + state_machine_regs.address += adv; + if (0) VG_(printf)("smr.a += %#x\n", adv ); + if (di->ddump_line) + VG_(printf)(" Advance PC by constant %d to 0x%lx\n", + (Int)adv, (Addr)state_machine_regs.address); + break; + + case DW_LNS_fixed_advance_pc: + /* XXX: Need something to get 2 bytes */ + adv = *((UShort *)data); + data += 2; + state_machine_regs.address += adv; + if (0) VG_(printf)("smr.a += %#x\n", adv ); + if (di->ddump_line) + VG_(printf)(" DWARF2-line: fixed_advance_pc\n"); + break; + + case DW_LNS_set_prologue_end: + if (di->ddump_line) + VG_(printf)(" DWARF2-line: set_prologue_end\n"); + break; + + case DW_LNS_set_epilogue_begin: + if (di->ddump_line) + VG_(printf)(" DWARF2-line: set_epilogue_begin\n"); + break; + + case DW_LNS_set_isa: + /*adv =*/ read_leb128 (data, & bytes_read, 0); + data += bytes_read; + if (di->ddump_line) + VG_(printf)(" DWARF2-line: set_isa\n"); + break; + + default: { + Int j; + for (j = standard_opcodes[op_code - 1]; j > 0 ; --j) { + read_leb128 (data, &bytes_read, 0); + data += bytes_read; + } + if (di->ddump_line) + VG_(printf)(" Unknown opcode %d\n", (Int)op_code); + break; + } + } /* switch (op_code) */ + + } /* if (op_code >= info.li_opcode_base) */ + + } /* while (data < end_of_sequence) */ + + if (di->ddump_line) + VG_(printf)("\n"); + + out: + free_WordArray(&filenames); + free_WordArray(&dirnames); + free_WordArray(&fnidx2dir); +} + +//////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////// + +/* Return abbrev for given code + * Returned pointer points to the tag + * */ +static UChar* lookup_abbrev( UChar* p, UInt acode ) +{ + UInt code; + UInt name; + for( ; ; ) { + code = read_leb128U( &p ); + if ( code == acode ) + return p; + read_leb128U( &p ); /* skip tag */ + p++; /* skip has_children flag */ + do { + name = read_leb128U( &p ); /* name */ + read_leb128U( &p ); /* form */ + } + while( name != 0 ); /* until name == form == 0 */ + } + return NULL; +} + +/* Read general information for a particular compile unit block in + * the .debug_info section. + * + * Input: - unitblock is the start of a compilation + * unit block in .debuginfo section + * - debugabbrev is start of .debug_abbrev section + * - debugstr is start of .debug_str section + * + * Output: Fill members of ui pertaining to the compilation unit: + * - ui->name is the name of the compilation unit + * - ui->compdir is the compilation unit directory + * - ui->stmt_list is the offset in .debug_line section + * for the dbginfos of this compilation unit + * + * Note : the output strings are not allocated and point + * directly to the memory-mapped section. + */ +static +void read_unitinfo_dwarf2( /*OUT*/UnitInfo* ui, + UChar* unitblock_img, + UChar* debugabbrev_img, + UChar* debugstr_img ) +{ + UInt acode, abcode; + ULong atoffs, blklen; + Int level; + UShort ver; + + UChar addr_size; + UChar* p = unitblock_img; + UChar* end_img; + UChar* abbrev_img; + + VG_(memset)( ui, 0, sizeof( UnitInfo ) ); + ui->stmt_list = -1LL; + + /* Read the compilation unit header in .debug_info section - See p 70 */ + + /* This block length */ + blklen = read_initial_length_field( p, &ui->dw64 ); + p += ui->dw64 ? 12 : 4; + + /* version should be 2 */ + ver = *((UShort*)p); + p += 2; + + /* get offset in abbrev */ + atoffs = ui->dw64 ? *((ULong*)p) : (ULong)(*((UInt*)p)); + p += ui->dw64 ? 8 : 4; + + /* Address size */ + addr_size = *p; + p += 1; + + end_img = unitblock_img + + blklen + (ui->dw64 ? 12 : 4); /* End of this block */ + level = 0; /* Level in the abbrev tree */ + abbrev_img = debugabbrev_img + + atoffs; /* Abbreviation data for this block */ + + /* Read the compilation unit entries */ + while ( p < end_img ) { + Bool has_child; + UInt tag; + + acode = read_leb128U( &p ); /* abbreviation code */ + if ( acode == 0 ) { + /* NULL entry used for padding - or last child for a sequence + - see para 7.5.3 */ + level--; + continue; + } + + /* Read abbreviation header */ + abcode = read_leb128U( &abbrev_img ); /* abbreviation code */ + if ( acode != abcode ) { + /* We are in in children list, and must rewind to a + * previously declared abbrev code. This code works but is + * not triggered since we shortcut the parsing once we have + * read the compile_unit block. This should only occur when + * level > 0 */ + abbrev_img = lookup_abbrev( debugabbrev_img + atoffs, acode ); + } + + tag = read_leb128U( &abbrev_img ); + has_child = *(abbrev_img++) == 1; /* DW_CHILDREN_yes */ + + if ( has_child ) + level++; + + /* And loop on entries */ + for ( ; ; ) { + /* Read entry definition */ + UInt name, form; + ULong cval = -1LL; /* Constant value read */ + Char *sval = NULL; /* String value read */ + name = read_leb128U( &abbrev_img ); + form = read_leb128U( &abbrev_img ); + if ( name == 0 ) + break; + + /* Read data */ + /* Attributes encoding explained p 71 */ + if ( form == 0x16 /* FORM_indirect */ ) + form = read_leb128U( &p ); + /* Decode form. For most kinds, Just skip the amount of data since + we don't use it for now */ + /* JRS 9 Feb 06: This now handles 64-bit DWARF too. In + 64-bit DWARF, lineptr (and loclistptr,macptr,rangelistptr + classes) use FORM_data8, not FORM_data4. Also, + FORM_ref_addr and FORM_strp are 64-bit values, not 32-bit + values. */ + switch( form ) { + /* Those cases extract the data properly */ + case 0x05: /* FORM_data2 */ cval = *((UShort*)p); p +=2; break; + case 0x06: /* FORM_data4 */ cval = *((UInt*)p);p +=4; break; + case 0x0e: /* FORM_strp */ /* pointer in .debug_str */ + /* 2006-01-01: only generate a value if + debugstr is non-NULL (which means that a + debug_str section was found) */ + if (debugstr_img && !ui->dw64) + sval = debugstr_img + *((UInt*)p); + if (debugstr_img && ui->dw64) + sval = debugstr_img + *((ULong*)p); + p += ui->dw64 ? 8 : 4; + break; + case 0x08: /* FORM_string */ sval = (Char*)p; + p += VG_(strlen)((Char*)p) + 1; break; + case 0x0b: /* FORM_data1 */ cval = *p; p++; break; + + /* TODO : Following ones just skip data - implement if you need */ + case 0x01: /* FORM_addr */ p += addr_size; break; + case 0x03: /* FORM_block2 */ p += *((UShort*)p) + 2; break; + case 0x04: /* FORM_block4 */ p += *((UInt*)p) + 4; break; + case 0x07: /* FORM_data8 */ if (ui->dw64) cval = *((ULong*)p); + p += 8; break; + /* perhaps should assign unconditionally to cval? */ + case 0x09: /* FORM_block */ p += read_leb128U( &p ); break; + case 0x0a: /* FORM_block1 */ p += *p + 1; break; + case 0x0c: /* FORM_flag */ p++; break; + case 0x0d: /* FORM_sdata */ read_leb128S( &p ); break; + case 0x0f: /* FORM_udata */ read_leb128U( &p ); break; + case 0x10: /* FORM_ref_addr */ p += ui->dw64 ? 8 : 4; break; + case 0x11: /* FORM_ref1 */ p++; break; + case 0x12: /* FORM_ref2 */ p += 2; break; + case 0x13: /* FORM_ref4 */ p += 4; break; + case 0x14: /* FORM_ref8 */ p += 8; break; + case 0x15: /* FORM_ref_udata */ read_leb128U( &p ); break; + + default: + VG_(printf)( "### unhandled dwarf2 abbrev form code 0x%x\n", form ); + break; + } + + /* Now store the members we need in the UnitInfo structure */ + if ( tag == 0x0011 /*TAG_compile_unit*/ ) { + if ( name == 0x03 ) ui->name = sval; /* DW_AT_name */ + else if ( name == 0x1b ) ui->compdir = sval; /* DW_AT_compdir */ + else if ( name == 0x10 ) ui->stmt_list = cval; /* DW_AT_stmt_list */ + } + } + /* Shortcut the parsing once we have read the compile_unit block + * That's enough info for us, and we are not gdb ! */ + if ( tag == 0x0011 /*TAG_compile_unit*/ ) + break; + } /* Loop on each sub block */ + + /* This test would be valid if we were not shortcutting the parsing + if (level != 0) + VG_(printf)( "#### Exiting debuginfo block at level %d !!!\n", level ); + */ +} + + +//////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////// + +/* Collect the debug info from DWARF3 debugging sections + * of a given module. + * + * Inputs: given .debug_xxx sections + * Output: update di to contain all the DWARF3 debug infos + */ +void ML_(read_debuginfo_dwarf3) + ( struct _DebugInfo* di, + UChar* debug_info_img, Word debug_info_sz, /* .debug_info */ + UChar* debug_abbv_img, Word debug_abbv_sz, /* .debug_abbrev */ + UChar* debug_line_img, Word debug_line_sz, /* .debug_line */ + UChar* debug_str_img, Word debug_str_sz ) /* .debug_str */ +{ + UnitInfo ui; + UShort ver; + UChar* block_img; + UChar* end1_img; + ULong blklen; + Bool blklen_is_64; + Int blklen_len; + + end1_img = debug_info_img + debug_info_sz; + blklen_len = 0; + + /* Make sure we at least have a header for the first block */ + if (debug_info_sz < 4) { + ML_(symerr)( di, True, + "Last block truncated in .debug_info; ignoring" ); + return; + } + + /* Iterate on all the blocks we find in .debug_info */ + for ( block_img = debug_info_img; + block_img < end1_img - 4; + block_img += blklen + blklen_len ) { + + /* Read the compilation unit header in .debug_info section - See + p 70 */ + /* This block length */ + blklen = read_initial_length_field( block_img, &blklen_is_64 ); + blklen_len = blklen_is_64 ? 12 : 4; + if ( block_img + blklen + blklen_len > end1_img ) { + ML_(symerr)( di, True, + "Last block truncated in .debug_info; ignoring" ); + return; + } + + /* version should be 2 */ + ver = *((UShort*)( block_img + blklen_len )); + if ( ver != 2 ) { + ML_(symerr)( di, True, + "Ignoring non-dwarf2 block in .debug_info" ); + continue; + } + + /* Fill ui with offset in .debug_line and compdir */ + if (0) + VG_(printf)( "Reading UnitInfo at 0x%lx.....\n", + block_img - debug_info_img + 0UL ); + read_unitinfo_dwarf2( &ui, block_img, + debug_abbv_img, debug_str_img ); + if (0) + VG_(printf)( " => LINES=0x%llx NAME=%s DIR=%s\n", + ui.stmt_list, ui.name, ui.compdir ); + + /* Ignore blocks with no .debug_line associated block */ + if ( ui.stmt_list == -1LL ) + continue; + + if (0) + VG_(printf)("debug_line_sz %ld, ui.stmt_list %lld %s\n", + debug_line_sz, ui.stmt_list, ui.name ); + /* Read the .debug_line block for this compile unit */ + read_dwarf2_lineblock( + di, &ui, debug_line_img + ui.stmt_list, + debug_line_sz - ui.stmt_list ); + } +} + + +//////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////// + +/*------------------------------------------------------------*/ +/*--- Read DWARF1 format line number info. ---*/ +/*------------------------------------------------------------*/ + +/* DWARF1 appears to be redundant, but nevertheless the Lahey Fortran + compiler generates it. +*/ + +/* The following three enums (dwarf_tag, dwarf_form, dwarf_attribute) + are taken from the file include/elf/dwarf.h in the GNU gdb-6.0 + sources, which are Copyright 1992, 1993, 1995, 1999 Free Software + Foundation, Inc and naturally licensed under the GNU General Public + License version 2 or later. +*/ + +/* Tag names and codes. */ + +enum dwarf_tag { + TAG_padding = 0x0000, + TAG_array_type = 0x0001, + TAG_class_type = 0x0002, + TAG_entry_point = 0x0003, + TAG_enumeration_type = 0x0004, + TAG_formal_parameter = 0x0005, + TAG_global_subroutine = 0x0006, + TAG_global_variable = 0x0007, + /* 0x0008 -- reserved */ + /* 0x0009 -- reserved */ + TAG_label = 0x000a, + TAG_lexical_block = 0x000b, + TAG_local_variable = 0x000c, + TAG_member = 0x000d, + /* 0x000e -- reserved */ + TAG_pointer_type = 0x000f, + TAG_reference_type = 0x0010, + TAG_compile_unit = 0x0011, + TAG_string_type = 0x0012, + TAG_structure_type = 0x0013, + TAG_subroutine = 0x0014, + TAG_subroutine_type = 0x0015, + TAG_typedef = 0x0016, + TAG_union_type = 0x0017, + TAG_unspecified_parameters = 0x0018, + TAG_variant = 0x0019, + TAG_common_block = 0x001a, + TAG_common_inclusion = 0x001b, + TAG_inheritance = 0x001c, + TAG_inlined_subroutine = 0x001d, + TAG_module = 0x001e, + TAG_ptr_to_member_type = 0x001f, + TAG_set_type = 0x0020, + TAG_subrange_type = 0x0021, + TAG_with_stmt = 0x0022, + + /* GNU extensions */ + + TAG_format_label = 0x8000, /* for FORTRAN 77 and Fortran 90 */ + TAG_namelist = 0x8001, /* For Fortran 90 */ + TAG_function_template = 0x8002, /* for C++ */ + TAG_class_template = 0x8003 /* for C++ */ +}; + +/* Form names and codes. */ + +enum dwarf_form { + FORM_ADDR = 0x1, + FORM_REF = 0x2, + FORM_BLOCK2 = 0x3, + FORM_BLOCK4 = 0x4, + FORM_DATA2 = 0x5, + FORM_DATA4 = 0x6, + FORM_DATA8 = 0x7, + FORM_STRING = 0x8 +}; + +/* Attribute names and codes. */ + +enum dwarf_attribute { + AT_sibling = (0x0010|FORM_REF), + AT_location = (0x0020|FORM_BLOCK2), + AT_name = (0x0030|FORM_STRING), + AT_fund_type = (0x0050|FORM_DATA2), + AT_mod_fund_type = (0x0060|FORM_BLOCK2), + AT_user_def_type = (0x0070|FORM_REF), + AT_mod_u_d_type = (0x0080|FORM_BLOCK2), + AT_ordering = (0x0090|FORM_DATA2), + AT_subscr_data = (0x00a0|FORM_BLOCK2), + AT_byte_size = (0x00b0|FORM_DATA4), + AT_bit_offset = (0x00c0|FORM_DATA2), + AT_bit_size = (0x00d0|FORM_DATA4), + /* (0x00e0|FORM_xxxx) -- reserved */ + AT_element_list = (0x00f0|FORM_BLOCK4), + AT_stmt_list = (0x0100|FORM_DATA4), + AT_low_pc = (0x0110|FORM_ADDR), + AT_high_pc = (0x0120|FORM_ADDR), + AT_language = (0x0130|FORM_DATA4), + AT_member = (0x0140|FORM_REF), + AT_discr = (0x0150|FORM_REF), + AT_discr_value = (0x0160|FORM_BLOCK2), + /* (0x0170|FORM_xxxx) -- reserved */ + /* (0x0180|FORM_xxxx) -- reserved */ + AT_string_length = (0x0190|FORM_BLOCK2), + AT_common_reference = (0x01a0|FORM_REF), + AT_comp_dir = (0x01b0|FORM_STRING), + AT_const_value_string = (0x01c0|FORM_STRING), + AT_const_value_data2 = (0x01c0|FORM_DATA2), + AT_const_value_data4 = (0x01c0|FORM_DATA4), + AT_const_value_data8 = (0x01c0|FORM_DATA8), + AT_const_value_block2 = (0x01c0|FORM_BLOCK2), + AT_const_value_block4 = (0x01c0|FORM_BLOCK4), + AT_containing_type = (0x01d0|FORM_REF), + AT_default_value_addr = (0x01e0|FORM_ADDR), + AT_default_value_data2 = (0x01e0|FORM_DATA2), + AT_default_value_data4 = (0x01e0|FORM_DATA4), + AT_default_value_data8 = (0x01e0|FORM_DATA8), + AT_default_value_string = (0x01e0|FORM_STRING), + AT_friends = (0x01f0|FORM_BLOCK2), + AT_inline = (0x0200|FORM_STRING), + AT_is_optional = (0x0210|FORM_STRING), + AT_lower_bound_ref = (0x0220|FORM_REF), + AT_lower_bound_data2 = (0x0220|FORM_DATA2), + AT_lower_bound_data4 = (0x0220|FORM_DATA4), + AT_lower_bound_data8 = (0x0220|FORM_DATA8), + AT_private = (0x0240|FORM_STRING), + AT_producer = (0x0250|FORM_STRING), + AT_program = (0x0230|FORM_STRING), + AT_protected = (0x0260|FORM_STRING), + AT_prototyped = (0x0270|FORM_STRING), + AT_public = (0x0280|FORM_STRING), + AT_pure_virtual = (0x0290|FORM_STRING), + AT_return_addr = (0x02a0|FORM_BLOCK2), + AT_abstract_origin = (0x02b0|FORM_REF), + AT_start_scope = (0x02c0|FORM_DATA4), + AT_stride_size = (0x02e0|FORM_DATA4), + AT_upper_bound_ref = (0x02f0|FORM_REF), + AT_upper_bound_data2 = (0x02f0|FORM_DATA2), + AT_upper_bound_data4 = (0x02f0|FORM_DATA4), + AT_upper_bound_data8 = (0x02f0|FORM_DATA8), + AT_virtual = (0x0300|FORM_STRING), + + /* GNU extensions. */ + + AT_sf_names = (0x8000|FORM_DATA4), + AT_src_info = (0x8010|FORM_DATA4), + AT_mac_info = (0x8020|FORM_DATA4), + AT_src_coords = (0x8030|FORM_DATA4), + AT_body_begin = (0x8040|FORM_ADDR), + AT_body_end = (0x8050|FORM_ADDR) +}; + +/* end of enums taken from gdb-6.0 sources */ + +void ML_(read_debuginfo_dwarf1) ( + struct _DebugInfo* di, + UChar* dwarf1d, Int dwarf1d_sz, + UChar* dwarf1l, Int dwarf1l_sz ) +{ + UInt stmt_list; + Bool stmt_list_found; + Int die_offset, die_szb, at_offset; + UShort die_kind, at_kind; + UChar* at_base; + UChar* src_filename; + + if (0) + VG_(printf)("read_debuginfo_dwarf1 ( %p, %d, %p, %d )\n", + dwarf1d, dwarf1d_sz, dwarf1l, dwarf1l_sz ); + + /* This loop scans the DIEs. */ + die_offset = 0; + while (True) { + if (die_offset >= dwarf1d_sz) break; + + die_szb = *(Int*)(dwarf1d + die_offset); + die_kind = *(UShort*)(dwarf1d + die_offset + 4); + + /* We're only interested in compile_unit DIEs; ignore others. */ + if (die_kind != TAG_compile_unit) { + die_offset += die_szb; + continue; + } + + if (0) + VG_(printf)("compile-unit DIE: offset %d, tag 0x%x, size %d\n", + die_offset, (Int)die_kind, die_szb ); + + /* We've got a compile_unit DIE starting at (dwarf1d + + die_offset+6). Try and find the AT_name and AT_stmt_list + attributes. Then, finally, we can read the line number info + for this source file. */ + + /* The next 3 are set as we find the relevant attrs. */ + src_filename = NULL; + stmt_list_found = False; + stmt_list = 0; + + /* This loop scans the Attrs inside compile_unit DIEs. */ + at_base = dwarf1d + die_offset + 6; + at_offset = 0; + while (True) { + if (at_offset >= die_szb-6) break; + + at_kind = *(UShort*)(at_base + at_offset); + if (0) VG_(printf)("atoffset %d, attag 0x%x\n", + at_offset, (Int)at_kind ); + at_offset += 2; /* step over the attribute itself */ + /* We have to examine the attribute to figure out its + length. */ + switch (at_kind) { + case AT_stmt_list: + case AT_language: + case AT_sibling: + if (at_kind == AT_stmt_list) { + stmt_list_found = True; + stmt_list = *(Int*)(at_base+at_offset); + } + at_offset += 4; break; + case AT_high_pc: + case AT_low_pc: + at_offset += sizeof(void*); break; + case AT_name: + case AT_producer: + case AT_comp_dir: + /* Zero terminated string, step over it. */ + if (at_kind == AT_name) + src_filename = at_base + at_offset; + while (at_offset < die_szb-6 && at_base[at_offset] != 0) + at_offset++; + at_offset++; + break; + default: + VG_(printf)("Unhandled DWARF-1 attribute 0x%x\n", + (Int)at_kind ); + VG_(core_panic)("Unhandled DWARF-1 attribute"); + } /* switch (at_kind) */ + } /* looping over attributes */ + + /* So, did we find the required stuff for a line number table in + this DIE? If yes, read it. */ + if (stmt_list_found /* there is a line number table */ + && src_filename != NULL /* we know the source filename */ + ) { + /* Table starts: + Length: + 4 bytes, includes the entire table + Base address: + unclear (4? 8?), assuming native pointer size here. + Then a sequence of triples + (source line number -- 32 bits + source line column -- 16 bits + address delta -- 32 bits) + */ + Addr base; + Int len; + Char* curr_filenm; + UChar* ptr; + UInt prev_line, prev_delta; + + curr_filenm = ML_(addStr) ( di, src_filename, -1 ); + prev_line = prev_delta = 0; + + ptr = dwarf1l + stmt_list; + len = *(Int*)ptr; ptr += sizeof(Int); + base = (Addr)(*(void**)ptr); ptr += sizeof(void*); + len -= (sizeof(Int) + sizeof(void*)); + while (len > 0) { + UInt line; + UShort col; + UInt delta; + line = *(UInt*)ptr; ptr += sizeof(UInt); + col = *(UShort*)ptr; ptr += sizeof(UShort); + delta = *(UShort*)ptr; ptr += sizeof(UInt); + if (0) VG_(printf)("line %d, col %d, delta %d\n", + line, (Int)col, delta ); + len -= (sizeof(UInt) + sizeof(UShort) + sizeof(UInt)); + + if (delta > 0 && prev_line > 0) { + if (0) VG_(printf) (" %d %d-%d\n", + prev_line, prev_delta, delta-1); + ML_(addLineInfo) ( di, curr_filenm, NULL, + base + prev_delta, base + delta, + prev_line, 0 ); + } + prev_line = line; + prev_delta = delta; + } + } + + /* Move on the the next DIE. */ + die_offset += die_szb; + + } /* Looping over DIEs */ + +} + + +/*------------------------------------------------------------*/ +/*--- Read call-frame info from an .eh_frame section ---*/ +/*------------------------------------------------------------*/ + +/* Sources of info: + + The DWARF3 spec, available from http://www.dwarfstd.org/Download.php + + This describes how to read CFA data from .debug_frame sections. + So as to maximise everybody's annoyance and confusion, .eh_frame + sections are almost the same as .debug_frame sections, but differ + in a few subtle and ill documented but important aspects. + + Generic ELF Specification, sections 7.5 (DWARF Extensions) and 7.6 + (Exception Frames), available from + + http://www.linux-foundation.org/spec/book/ELF-generic/ELF-generic.html + + This really does describe .eh_frame, at least the aspects that + differ from standard DWARF3. It's better than guessing, and + (marginally) more fun than reading the gdb source code. +*/ + +/* Useful info .. + + In general: + gdb-6.3/gdb/dwarf2-frame.c + + gdb-6.3/gdb/i386-tdep.c: + + DWARF2/GCC uses the stack address *before* the function call as a + frame's CFA. [jrs: I presume this means %esp before the call as + the CFA]. + + JRS: on amd64, the dwarf register numbering is, as per + gdb-6.3/gdb/amd64-tdep.c and also amd64-abi-0.98.pdf: + + 0 1 2 3 4 5 6 7 + RAX RDX RCX RBX RSI RDI RBP RSP + + 8 ... 15 + R8 ... R15 + + 16 is the return address (RIP) + "The table defines Return Address to have a register number, + even though the address is stored in 0(%rsp) and not in a + physical register." + + 17 ... 24 + XMM0 ... XMM7 + + 25 ... 32 + XMM8 ... XMM15 + + 33 ... 40 + ST0 ... ST7 + + 41 ... 48 + MM0 ... MM7 + + 49 RFLAGS + 50,51,52,53,54,55 ES,CS,SS,DS,FS,GS + 58 FS.BASE (what's that?) + 59 GS.BASE (what's that?) + 62 TR (task register) + 63 LDTR (LDT register) + 64 MXCSR + 65 FCW (x87 control word) + 66 FSW (x86 status word) + + On x86 I cannot find any documentation. It _appears_ to be the + actual instruction encoding, viz: + + 0 1 2 3 4 5 6 7 + EAX ECX EDX EBX ESP EBP ESI EDI + + 8 is the return address (EIP) */ + + +/* Comments re DW_CFA_set_loc, 16 Nov 06. + + JRS: + Someone recently sent me a libcrypto.so.0.9.8 as distributed with + Ubuntu of some flavour, compiled with gcc 4.1.2 on amd64. It + causes V's CF reader to complain a lot: + + >> --19976-- DWARF2 CFI reader: unhandled CFI instruction 0:24 + >> --19976-- DWARF2 CFI reader: unhandled CFI instruction 0:24 + >> --19976-- DWARF2 CFI reader: unhandled CFI instruction 0:24 + >> --19976-- DWARF2 CFI reader: unhandled CFI instruction 0:24 + >> --19976-- DWARF2 CFI reader: unhandled CFI instruction 0:48 + >> --19976-- DWARF2 CFI reader: unhandled CFI instruction 0:24 + + After chasing this around a bit it seems that the CF bytecode + parser lost sync at a DW_CFA_set_loc, which has a single argument + denoting an address. + + As it stands that address is extracted by read_Addr(). On amd64 + that just fetches 8 bytes regardless of anything else. + + read_encoded_Addr() is more sophisticated. This appears to take + into account some kind of encoding flag. When I replace the uses + of read_Addr by read_encoded_Addr for DW_CFA_set_loc, the + complaints go away, there is no loss of sync, and the parsed CF + instructions are the same as shown by readelf --debug-dump=frames. + + So it seems a plausible fix. The problem is I looked in the DWARF3 + spec and completely failed to figure out whether or not the arg to + DW_CFA_set_loc is supposed to be encoded in a way suitable for + read_encoded_Addr, nor for that matter any description of what it + is that read_encoded_Addr is really decoding. + + TomH: + The problem is that the encoding is not standard - the eh_frame + section uses the same encoding as the dwarf_frame section except + for a few small changes, and this is one of them. So this is not + something the DWARF standard covers. + + There is an augmentation string to indicate what is going on though + so that programs can recognise it. + + What we are doing seems to match what gdb 6.5 and libdwarf 20060614 + do though. I'm not sure about readelf though. + + (later): Well dwarfdump barfs on it: + + dwarfdump ERROR: dwarf_get_fde_info_for_reg: + DW_DLE_DF_FRAME_DECODING_ERROR(193) (193) + + I've looked at binutils as well now, and the code in readelf agrees + with your patch - ie it treats set_loc as having an encoded address + if there is a zR augmentation indicating an encoding. + + Quite why gdb and libdwarf don't understand this is an interesting + question... + + Final outcome: all uses of read_Addr were replaced by + read_encoded_Addr. A new type AddressDecodingInfo was added to + make it relatively clean to plumb through the extra info needed by + read_encoded_Addr. +*/ + +/* More badness re address encoding, 12 Jan 07. + + Most gcc provided CIEs have a "zR" augmentation, which means they + supply their own address encoding, and that works fine. However, + some icc9 supplied CIEs have no augmentation, which means they use + the default_Addr_encoding(). That says to use a machine-word sized + value, literally unmodified. + + Since .so's are, in general, relocated when loaded, having absolute + addresses in the CFI data makes no sense when read_encoded_Addr is + used to find the initial location for a FDE. The resulting saga: + + TomH: + > I'm chasing a stack backtrace failure for an amd64 .so which was + > created I believe by icc 9.1. After a while I wound up looking at + > this: (readdwarf.c) + > + > 5083 tom static UChar default_Addr_encoding ( void ) + > 3584 tom { + > 3584 tom switch (sizeof(Addr)) { + > 3584 tom case 4: return DW_EH_PE_udata4; + > 3584 tom case 8: return DW_EH_PE_udata8; + > 3584 tom default: vg_assert(0); + > 3584 tom } + > 3584 tom } + > + > If a CIE does not have an "augmentation string" (typically "zR") then + > addresses are decoded as described by default_Addr_encoding. If there + > is an 'R' in the augmentation string then the encoding to use + > is specified by the CIE itself, which works fine with GCC compiled code + > since that always appears to specify zR. + + Correct. + + > Problem is this .so has no augmentation string and so uses the + > default encoding, viz DW_EH_PE_udata8. That appears to mean + > "read a 64 bit number" and use that as-is (for the starting value + > of the program counter when running the CFA program). + + Strictly speaking the default is DW_EH_PE_absptr, but that amounts + to either udata4 or udata8 depending on the platform's pointer size + which is a shortcut I used. + + > For this .so that gives nonsense (very small) PCs which are later + > rejected by the sanity check which ensures PC ranges fall inside + > the mapped text segment. It seems like the .so expects to have the + > start VMA of the text segment added on. This would correspond to + > + > static UChar default_Addr_encoding ( void ) + > { + > switch (sizeof(Addr)) { + > case 4: return DW_EH_PE_textrel + DW_EH_PE_udata4; + > case 8: return DW_EH_PE_textrel + DW_EH_PE_udata8; + > default: vg_assert(0); + > } + > } + + The problem you're seeing is that you have absolute pointers inside + a shared library, which obviously makes little sense on the face of + things as how would the linker know where the library will be + loaded? + + The answer of course is that it doesn't, so if it points absolute + pointers in the frame unwind data is has to include relocations for + them, and I'm betting that if you look at the relocations in the + library you will there are some for that data. + + That is fine of course when ld.so maps the library - it will + relocate the eh_frame data as it maps it (or prelinking will + already have done so) and when the g++ exception code kicks in and + unwinds the stack it will see relocated data. + + We of course are mapping the section from the ELF file ourselves + and are not applying the relocations, hence the problem you are + seeing. + + Strictly speaking we should apply the relocations but the cheap + solution is essentially to do what you've done - strictly speaking + you should adjust by the difference between the address the library + was linked for and the address it has been loaded at, but a shared + library will normally be linked for address zero I believe. It's + possible that prelinking might change that though? + + JRS: + That all syncs with what I am seeing. + + So what I am inclined to do is: + + - Leave default_Addr_encoding as it is + + - Change read_encoded_Addr's handling of "case DW_EH_PE_absptr" so + it sets base to, as you say, the difference between the address + the library was linked for and the address it has been loaded at + (== the SegInfo's text_bias) + + Does that sound sane? I think it should even handle the prelinked + case. + + (JRS, later) + + Hmm. Plausible as it sounds, it doesn't work. It now produces + bogus backtraces for locations inside the (statically linked) + memcheck executable. + + Besides, there are a couple of other places where read_encoded_Addr + is used -- one of which is used to establish the length of the + address range covered by the current FDE: + + fde_arange = read_encoded_Addr(&nbytes, &adi, data); + + and it doesn't seem to make any sense for read_encoded_Addr to add + on the text segment bias in that context. The DWARF3 spec says + that both the initial_location and address_range (length) fields + are encoded the same way ("target address"), so it is unclear at + what stage in the process it would be appropriate to relocate the + former but not the latter. + + One unprincipled kludge that does work is the following: just + before handing one of the address range fragments off to + ML_(addDiCfSI) for permanent storage, check its start address. If + that is very low (less than 2 M), and is far below the mapped text + segment, and adding the text bias would move the fragment entirely + inside the mapped text segment, then do so. A kind of kludged + last-minute relocation, if you like. + + 12 Jan 07: committing said kludge (see kludge_then_addDiCfSI). If + the situation clarifies, it can easily enough be backed out and + replaced by a better fix. +*/ + +/* --------------- Decls --------------- */ + +#if defined(VGP_x86_linux) +# define FP_REG 5 +# define SP_REG 4 +# define RA_REG_DEFAULT 8 +#elif defined(VGP_amd64_linux) +# define FP_REG 6 +# define SP_REG 7 +# define RA_REG_DEFAULT 16 +#elif defined(VGP_ppc32_linux) +# define FP_REG 1 +# define SP_REG 1 +# define RA_REG_DEFAULT 8 // CAB: What's a good default ? +#elif defined(VGP_ppc64_linux) +# define FP_REG 1 +# define SP_REG 1 +# define RA_REG_DEFAULT 8 // CAB: What's a good default ? +#else +# error "Unknown platform" +#endif + +/* the number of regs we are prepared to unwind */ +#define N_CFI_REGS 20 + +/* Instructions for the automaton */ +enum dwarf_cfa_primary_ops + { + DW_CFA_use_secondary = 0, + DW_CFA_advance_loc = 1, + DW_CFA_offset = 2, + DW_CFA_restore = 3 + }; + +enum dwarf_cfa_secondary_ops + { + DW_CFA_nop = 0x00, + DW_CFA_set_loc = 0x01, + DW_CFA_advance_loc1 = 0x02, + DW_CFA_advance_loc2 = 0x03, + DW_CFA_advance_loc4 = 0x04, + DW_CFA_offset_extended = 0x05, + DW_CFA_restore_extended = 0x06, + DW_CFA_undefined = 0x07, + DW_CFA_same_value = 0x08, + DW_CFA_register = 0x09, + DW_CFA_remember_state = 0x0a, + DW_CFA_restore_state = 0x0b, + DW_CFA_def_cfa = 0x0c, + DW_CFA_def_cfa_register = 0x0d, + DW_CFA_def_cfa_offset = 0x0e, + DW_CFA_def_cfa_expression = 0x0f, /* DWARF3 only */ + DW_CFA_expression = 0x10, /* DWARF3 only */ + DW_CFA_offset_extended_sf = 0x11, /* DWARF3 only */ + DW_CFA_def_cfa_sf = 0x12, /* DWARF3 only */ + DW_CFA_def_cfa_offset_sf = 0x13, /* DWARF3 only */ + DW_CFA_val_offset = 0x14, /* DWARF3 only */ + DW_CFA_val_offset_sf = 0x15, /* DWARF3 only */ + DW_CFA_val_expression = 0x16, /* DWARF3 only */ + DW_CFA_lo_user = 0x1c, + DW_CFA_GNU_window_save = 0x2d, /* GNU extension */ + DW_CFA_GNU_args_size = 0x2e, /* GNU extension */ + DW_CFA_GNU_negative_offset_extended = 0x2f, /* GNU extension */ + DW_CFA_hi_user = 0x3f + }; + +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_omit 0xff + +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0A +#define DW_EH_PE_sdata4 0x0B +#define DW_EH_PE_sdata8 0x0C +#define DW_EH_PE_signed 0x08 + +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 + +#define DW_EH_PE_indirect 0x80 + + +/* RegRule and UnwindContext are used temporarily to do the unwinding. + The result is then summarised into a sequence of CfiSIs, if + possible. UnwindContext effectively holds the state of the + abstract machine whilst it is running. + + The CFA can either be a signed offset from a register, + or an expression: + + CFA = cfa_reg + cfa_off when UnwindContext.cfa_is_regoff==True + | [[ cfa_expr_id ]] + + When .cfa_is_regoff == True, cfa_expr_id must be zero + When .cfa_is_regoff == False, cfa_reg must be zero + and cfa_off must be zero + + RegRule describes, for each register, how to get its + value in the previous frame, where 'cfa' denotes the cfa + for the frame as a whole: + + RegRule = RR_Undef -- undefined + | RR_Same -- same as in previous frame + | RR_CFAOff arg -- is at * ( cfa + arg ) + | RR_CFAValOff arg -- is ( cfa + arg ) + | RR_Reg arg -- is in register 'arg' + | RR_Expr arg -- is at * [[ arg ]] + | RR_ValExpr arg -- is [[ arg ]] + | RR_Arch -- dunno + + Note that RR_Expr is redundant since the same can be represented + using RR_ValExpr with an explicit dereference (CfiExpr_Deref) at + the outermost level. + + All expressions are stored in exprs in the containing + UnwindContext. Since the UnwindContext gets reinitialised for each + new FDE, summarise_context needs to copy out any expressions it + wants to keep into the cfsi_exprs field of the containing SegInfo. +*/ +typedef + struct { + enum { RR_Undef, RR_Same, RR_CFAOff, RR_CFAValOff, + RR_Reg, /*RR_Expr,*/ RR_ValExpr, RR_Arch } tag; + /* meaning: int offset for CFAoff/CFAValOff + reg # for Reg + expr index for Expr/ValExpr */ + Int arg; + } + RegRule; + +static void ppRegRule ( XArray* exprs, RegRule* rrule ) +{ + vg_assert(exprs); + switch (rrule->tag) { + case RR_Undef: VG_(printf)("u "); break; + case RR_Same: VG_(printf)("s "); break; + case RR_CFAOff: VG_(printf)("c%d ", rrule->arg); break; + case RR_CFAValOff: VG_(printf)("v%d ", rrule->arg); break; + case RR_Reg: VG_(printf)("r%d ", rrule->arg); break; + case RR_ValExpr: VG_(printf)("ve{"); + ML_(ppCfiExpr)( exprs, rrule->arg ); + VG_(printf)("} "); + break; + case RR_Arch: VG_(printf)("a "); break; + default: VG_(core_panic)("ppRegRule"); + } +} + + +typedef + struct { + /* Read-only fields (set by the CIE) */ + Int code_a_f; + Int data_a_f; + Addr initloc; + Int ra_reg; + /* The rest of these fields can be modifed by + run_CF_instruction. */ + /* The LOC entry */ + Addr loc; + /* The CFA entry. This can be either reg+/-offset or an expr. */ + Bool cfa_is_regoff; /* True=>is reg+offset; False=>is expr */ + Int cfa_reg; + Int cfa_off; /* in bytes */ + Int cfa_expr_ix; /* index into cfa_exprs */ + /* register unwind rules */ + RegRule reg[N_CFI_REGS]; + /* array of CfiExpr, shared by reg[] and cfa_expr_ix */ + XArray* exprs; + } + UnwindContext; + +static void ppUnwindContext ( UnwindContext* ctx ) +{ + Int i; + VG_(printf)("0x%llx: ", (ULong)ctx->loc); + if (ctx->cfa_is_regoff) { + VG_(printf)("%d(r%d) ", ctx->cfa_off, ctx->cfa_reg); + } else { + vg_assert(ctx->exprs); + VG_(printf)("{"); + ML_(ppCfiExpr)( ctx->exprs, ctx->cfa_expr_ix ); + VG_(printf)("} "); + } + for (i = 0; i < N_CFI_REGS; i++) + ppRegRule(ctx->exprs, &ctx->reg[i]); + VG_(printf)("\n"); +} + +static void initUnwindContext ( /*OUT*/UnwindContext* ctx ) +{ + Int i; + ctx->code_a_f = 0; + ctx->data_a_f = 0; + ctx->initloc = 0; + ctx->ra_reg = RA_REG_DEFAULT; + ctx->loc = 0; + ctx->cfa_is_regoff = True; + ctx->cfa_reg = 0; + ctx->cfa_off = 0; + ctx->cfa_expr_ix = 0; + ctx->exprs = NULL; + for (i = 0; i < N_CFI_REGS; i++) { + ctx->reg[i].tag = RR_Undef; + ctx->reg[i].arg = 0; + } +} + + +/* A structure which holds information needed by read_encoded_Addr(). +*/ +typedef + struct { + UChar encoding; + UChar* ehframe_image; + Addr ehframe_avma; + Addr text_bias; + } + AddressDecodingInfo; + + +/* ------------ Deal with summary-info records ------------ */ + +static void initCfiSI ( DiCfSI* si ) +{ + si->base = 0; + si->len = 0; + si->cfa_how = 0; + si->ra_how = 0; + si->sp_how = 0; + si->fp_how = 0; + si->cfa_off = 0; + si->ra_off = 0; + si->sp_off = 0; + si->fp_off = 0; +} + + +/* --------------- Summarisation --------------- */ + +/* Forward */ +static +Int copy_convert_CfiExpr_tree ( XArray* dst, + UnwindContext* srcuc, + Int nd ); + +/* Summarise ctx into si, if possible. Returns True if successful. + This is taken to be just after ctx's loc advances; hence the + summary is up to but not including the current loc. This works + on both x86 and amd64. +*/ +static Bool summarise_context( /*OUT*/DiCfSI* si, + Addr loc_start, + UnwindContext* ctx, + struct _DebugInfo* debuginfo ) +{ + Int why = 0; + initCfiSI(si); + + /* How to generate the CFA */ + if (!ctx->cfa_is_regoff) { + /* it was set by DW_CFA_def_cfa_expression; try to convert */ + XArray *src, *dst; + Int conv; + src = ctx->exprs; + dst = debuginfo->cfsi_exprs; + if (src && (VG_(sizeXA)(src) > 0) && (!dst)) { + dst = VG_(newXA)( ML_(dinfo_zalloc), "di.ccCt.1", ML_(dinfo_free), + sizeof(CfiExpr) ); + vg_assert(dst); + debuginfo->cfsi_exprs = dst; + } + conv = copy_convert_CfiExpr_tree + ( dst, ctx, ctx->cfa_expr_ix ); + vg_assert(conv >= -1); + if (conv == -1) { why = 6; goto failed; } + si->cfa_how = CFIC_EXPR; + si->cfa_off = conv; + if (0 && debuginfo->ddump_frames) + ML_(ppCfiExpr)(dst, conv); + } else + if (ctx->cfa_is_regoff && ctx->cfa_reg == SP_REG) { + si->cfa_how = CFIC_SPREL; + si->cfa_off = ctx->cfa_off; + } else + if (ctx->cfa_is_regoff && ctx->cfa_reg == FP_REG) { + si->cfa_how = CFIC_FPREL; + si->cfa_off = ctx->cfa_off; + } else { + why = 1; + goto failed; + } + +# define SUMMARISE_HOW(_how, _off, _ctxreg) \ + switch (_ctxreg.tag) { \ + case RR_Undef: \ + _how = CFIR_UNKNOWN; _off = 0; break; \ + case RR_Same: \ + _how = CFIR_SAME; _off = 0; break; \ + case RR_CFAOff: \ + _how = CFIR_MEMCFAREL; _off = _ctxreg.arg; break; \ + case RR_CFAValOff: \ + _how = CFIR_CFAREL; _off = _ctxreg.arg; break; \ + case RR_ValExpr: { \ + XArray *src, *dst; \ + Int conv; \ + src = ctx->exprs; \ + dst = debuginfo->cfsi_exprs; \ + if (src && (VG_(sizeXA)(src) > 0) && (!dst)) { \ + dst = VG_(newXA)( ML_(dinfo_zalloc), \ + "di.ccCt.2", \ + ML_(dinfo_free), \ + sizeof(CfiExpr) ); \ + vg_assert(dst); \ + debuginfo->cfsi_exprs = dst; \ + } \ + conv = copy_convert_CfiExpr_tree \ + ( dst, ctx, _ctxreg.arg ); \ + vg_assert(conv >= -1); \ + if (conv == -1) { why = 7; goto failed; } \ + _how = CFIR_EXPR; \ + _off = conv; \ + if (0 && debuginfo->ddump_frames) \ + ML_(ppCfiExpr)(dst, conv); \ + break; \ + } \ + default: \ + why = 2; goto failed; /* otherwise give up */ \ + } + + SUMMARISE_HOW(si->ra_how, si->ra_off, ctx->reg[ctx->ra_reg] ); + SUMMARISE_HOW(si->fp_how, si->fp_off, ctx->reg[FP_REG] ); + +# undef SUMMARISE_HOW + + /* on x86/amd64, it seems the old %{e,r}sp value before the call is + always the same as the CFA. Therefore ... */ + si->sp_how = CFIR_CFAREL; + si->sp_off = 0; + + /* also, gcc says "Undef" for %{e,r}bp when it is unchanged. So + .. */ + if (ctx->reg[FP_REG].tag == RR_Undef) + si->fp_how = CFIR_SAME; + + /* knock out some obviously stupid cases */ + if (si->ra_how == CFIR_SAME) + { why = 3; goto failed; } + + /* bogus looking range? Note, we require that the difference is + representable in 32 bits. */ + if (loc_start >= ctx->loc) + { why = 4; goto failed; } + if (ctx->loc - loc_start > 10000000 /* let's say */) + { why = 5; goto failed; } + + si->base = loc_start + ctx->initloc; + si->len = (UInt)(ctx->loc - loc_start); + + return True; + + failed: + if (VG_(clo_verbosity) > 2 || debuginfo->trace_cfi) { + VG_(message)(Vg_DebugMsg, + "summarise_context(loc_start = %#lx)" + ": cannot summarise(why=%d): ", loc_start, why); + ppUnwindContext(ctx); + } + return False; +} + +/* Copy the tree rooted at srcuc->exprs node srcix to dstxa, on the + way converting any DwReg regs (regs numbered using the Dwarf scheme + defined by each architecture's ABI) into CfiRegs, which are + platform independent. If the conversion isn't possible because + there is no equivalent register, return -1. This has the + undesirable side effect of de-dagifying the input; oh well. */ +static Int copy_convert_CfiExpr_tree ( XArray* dstxa, + UnwindContext* srcuc, + Int srcix ) +{ + CfiExpr* src; + Int cpL, cpR, cpA, dwreg; + XArray* srcxa = srcuc->exprs; + vg_assert(srcxa); + vg_assert(dstxa); + vg_assert(srcix >= 0 && srcix < VG_(sizeXA)(srcxa)); + + src = VG_(indexXA)( srcxa, srcix ); + switch (src->tag) { + case Cex_Undef: + return ML_(CfiExpr_Undef)( dstxa ); + case Cex_Deref: + cpA = copy_convert_CfiExpr_tree( dstxa, srcuc, src->Cex.Deref.ixAddr ); + if (cpA == -1) + return -1; /* propagate failure */ + return ML_(CfiExpr_Deref)( dstxa, cpA ); + case Cex_Const: + return ML_(CfiExpr_Const)( dstxa, src->Cex.Const.con ); + case Cex_Binop: + cpL = copy_convert_CfiExpr_tree( dstxa, srcuc, src->Cex.Binop.ixL ); + cpR = copy_convert_CfiExpr_tree( dstxa, srcuc, src->Cex.Binop.ixR ); + vg_assert(cpL >= -1 && cpR >= -1); + if (cpL == -1 || cpR == -1) + return -1; /* propagate failure */ + return ML_(CfiExpr_Binop)( dstxa, src->Cex.Binop.op, cpL, cpR ); + case Cex_CfiReg: + /* should not see these in input (are created only by this + conversion step!) */ + VG_(core_panic)("copy_convert_CfiExpr_tree: CfiReg in input"); + case Cex_DwReg: + /* This is the only place where the conversion can fail. */ + dwreg = src->Cex.DwReg.reg; + if (dwreg == SP_REG) + return ML_(CfiExpr_CfiReg)( dstxa, Creg_SP ); + if (dwreg == FP_REG) + return ML_(CfiExpr_CfiReg)( dstxa, Creg_FP ); + if (dwreg == srcuc->ra_reg) + return ML_(CfiExpr_CfiReg)( dstxa, Creg_IP ); /* correct? */ + /* else we must fail - can't represent the reg */ + return -1; + default: + VG_(core_panic)("copy_convert_CfiExpr_tree: default"); + } +} + + +static void ppUnwindContext_summary ( UnwindContext* ctx ) +{ + VG_(printf)("0x%llx-1: ", (ULong)ctx->loc); + + if (ctx->cfa_reg == SP_REG) { + VG_(printf)("SP/CFA=%d+SP ", ctx->cfa_off); + } else + if (ctx->cfa_reg == FP_REG) { + VG_(printf)("SP/CFA=%d+FP ", ctx->cfa_off); + } else { + VG_(printf)("SP/CFA=unknown "); + } + + VG_(printf)("RA="); + ppRegRule( ctx->exprs, &ctx->reg[ctx->ra_reg] ); + + VG_(printf)("FP="); + ppRegRule( ctx->exprs, &ctx->reg[FP_REG] ); + VG_(printf)("\n"); +} + + +/* ------------ Pick apart DWARF2 byte streams ------------ */ + +static inline Bool host_is_little_endian ( void ) +{ + UInt x = 0x76543210; + UChar* p = (UChar*)(&x); + return toBool(*p == 0x10); +} + +static Short read_Short ( UChar* data ) +{ + Short r = 0; + vg_assert(host_is_little_endian()); + r = data[0] + | ( ((UInt)data[1]) << 8 ); + return r; +} + +static Int read_Int ( UChar* data ) +{ + Int r = 0; + vg_assert(host_is_little_endian()); + r = data[0] + | ( ((UInt)data[1]) << 8 ) + | ( ((UInt)data[2]) << 16 ) + | ( ((UInt)data[3]) << 24 ); + return r; +} + +static Long read_Long ( UChar* data ) +{ + Long r = 0; + vg_assert(host_is_little_endian()); + r = data[0] + | ( ((ULong)data[1]) << 8 ) + | ( ((ULong)data[2]) << 16 ) + | ( ((ULong)data[3]) << 24 ) + | ( ((ULong)data[4]) << 32 ) + | ( ((ULong)data[5]) << 40 ) + | ( ((ULong)data[6]) << 48 ) + | ( ((ULong)data[7]) << 56 ); + return r; +} + +static UShort read_UShort ( UChar* data ) +{ + UInt r = 0; + vg_assert(host_is_little_endian()); + r = data[0] + | ( ((UInt)data[1]) << 8 ); + return r; +} + +static UInt read_UInt ( UChar* data ) +{ + UInt r = 0; + vg_assert(host_is_little_endian()); + r = data[0] + | ( ((UInt)data[1]) << 8 ) + | ( ((UInt)data[2]) << 16 ) + | ( ((UInt)data[3]) << 24 ); + return r; +} + +static ULong read_ULong ( UChar* data ) +{ + ULong r = 0; + vg_assert(host_is_little_endian()); + r = data[0] + | ( ((ULong)data[1]) << 8 ) + | ( ((ULong)data[2]) << 16 ) + | ( ((ULong)data[3]) << 24 ) + | ( ((ULong)data[4]) << 32 ) + | ( ((ULong)data[5]) << 40 ) + | ( ((ULong)data[6]) << 48 ) + | ( ((ULong)data[7]) << 56 ); + return r; +} + +static UChar read_UChar ( UChar* data ) +{ + return data[0]; +} + +static ULong read_le_u_encoded_literal ( UChar* data, UInt size ) +{ + switch (size) { + case 8: return (ULong)read_ULong( data ); + case 4: return (ULong)read_UInt( data ); + case 2: return (ULong)read_UShort( data ); + case 1: return (ULong)read_UChar( data ); + default: vg_assert(0); /*NOTREACHED*/ return 0; + } +} + +static Long read_le_s_encoded_literal ( UChar* data, UInt size ) +{ + Long s64 = read_le_u_encoded_literal( data, size ); + switch (size) { + case 8: break; + case 4: s64 <<= 32; s64 >>= 32; break; + case 2: s64 <<= 48; s64 >>= 48; break; + case 1: s64 <<= 56; s64 >>= 56; break; + default: vg_assert(0); /*NOTREACHED*/ return 0; + } + return s64; +} + +static UChar default_Addr_encoding ( void ) +{ + switch (sizeof(Addr)) { + case 4: return DW_EH_PE_udata4; + case 8: return DW_EH_PE_udata8; + default: vg_assert(0); + } +} + +static UInt size_of_encoded_Addr ( UChar encoding ) +{ + if (encoding == DW_EH_PE_omit) + return 0; + + switch (encoding & 0x07) { + case DW_EH_PE_absptr: return sizeof(Addr); + case DW_EH_PE_udata2: return sizeof(UShort); + case DW_EH_PE_udata4: return sizeof(UInt); + case DW_EH_PE_udata8: return sizeof(ULong); + default: vg_assert(0); + } +} + +static Addr read_encoded_Addr ( /*OUT*/Int* nbytes, + AddressDecodingInfo* adi, + UChar* data ) +{ + /* Regarding the handling of DW_EH_PE_absptr. DWARF3 says this + denotes an absolute address, hence you would think 'base' is + zero. However, that is nonsensical (unless relocations are to + be applied to the unwind data before reading it, which sounds + unlikely). My interpretation is that DW_EH_PE_absptr indicates + an address relative to where the object was loaded (technically, + relative to its stated load VMA, hence the use of text_bias + rather than text_avma). Hmm, should we use text_bias or + text_avma here? Not sure. + + This view appears to be supported by DWARF3 spec sec 7.3 + "Executable Objects and Shared Objects": + + This requirement makes the debugging information for shared + objects position independent. Virtual addresses in a shared + object may be calculated by adding the offset to the base + address at which the object was attached. This offset is + available in the run-time linker's data structures. + */ + Addr base; + Word offset; + UChar encoding = adi->encoding; + UChar* ehframe_image = adi->ehframe_image; + Addr ehframe_avma = adi->ehframe_avma; + + vg_assert((encoding & DW_EH_PE_indirect) == 0); + + *nbytes = 0; + + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + base = adi->text_bias; + break; + case DW_EH_PE_pcrel: + base = ehframe_avma + ( data - ehframe_image ); + break; + case DW_EH_PE_datarel: + vg_assert(0); + base = /* data base address */ 0; + break; + case DW_EH_PE_textrel: + vg_assert(0); + base = /* text base address */ 0; + break; + case DW_EH_PE_funcrel: + base = 0; + break; + case DW_EH_PE_aligned: + base = 0; + offset = data - ehframe_image; + if ((offset % sizeof(Addr)) != 0) { + *nbytes = sizeof(Addr) - (offset % sizeof(Addr)); + data += *nbytes; + } + break; + default: + vg_assert(0); + } + + if ((encoding & 0x07) == 0x00) + encoding |= default_Addr_encoding(); + + switch (encoding & 0x0f) { + case DW_EH_PE_udata2: + *nbytes += sizeof(UShort); + return base + read_UShort(data); + case DW_EH_PE_udata4: + *nbytes += sizeof(UInt); + return base + read_UInt(data); + case DW_EH_PE_udata8: + *nbytes += sizeof(ULong); + return base + read_ULong(data); + case DW_EH_PE_sdata2: + *nbytes += sizeof(Short); + return base + read_Short(data); + case DW_EH_PE_sdata4: + *nbytes += sizeof(Int); + return base + read_Int(data); + case DW_EH_PE_sdata8: + *nbytes += sizeof(Long); + return base + read_Long(data); + default: + vg_assert2(0, "read encoded address %d\n", encoding & 0x0f); + } +} + + +/* ------------ Run/show DWARF3 expressions ---------- */ + +/* Convert the DWARF3 expression in expr[0 .. exprlen-1] into a dag + (of CfiExprs) stored in ctx->exprs, and return the index in + ctx->exprs of the root node. Or fail in which case return -1. */ +/* IMPORTANT: when adding expression forms here, also remember to + add suitable evaluation code in evalCfiExpr in debuginfo.c. */ +static Int dwarfexpr_to_dag ( UnwindContext* ctx, + UChar* expr, Int exprlen, + Bool push_cfa_at_start, + Bool ddump_frames ) +{ +# define N_EXPR_STACK 20 + +# define PUSH(_arg) \ + do { \ + vg_assert(sp >= -1 && sp < N_EXPR_STACK); \ + if (sp == N_EXPR_STACK-1) \ + return -1; \ + sp++; \ + stack[sp] = (_arg); \ + } while (0) + +# define POP(_lval) \ + do { \ + vg_assert(sp >= -1 && sp < N_EXPR_STACK); \ + if (sp == -1) \ + return -1; \ + _lval = stack[sp]; \ + sp--; \ + } while (0) + + Int ix, ix2, reg; + UChar opcode; + Word sw; + UWord uw; + CfiOp op; + HChar* opname; + + Int sp; /* # of top element: valid is -1 .. N_EXPR_STACK-1 */ + Int stack[N_EXPR_STACK]; /* indices into ctx->exprs */ + + XArray* dst = ctx->exprs; + UChar* limit = expr + exprlen; + + vg_assert(dst); + vg_assert(exprlen >= 0); + + sp = -1; /* empty */ + + /* Synthesise the CFA as a CfiExpr */ + if (push_cfa_at_start) { + if (ctx->cfa_is_regoff) { + /* cfa is reg +/- offset */ + ix = ML_(CfiExpr_Binop)( dst, + Cop_Add, + ML_(CfiExpr_DwReg)( dst, ctx->cfa_reg ), + ML_(CfiExpr_Const)( dst, (UWord)(Word)ctx->cfa_off ) + ); + PUSH(ix); + } else { + /* CFA is already an expr; use its root node */ + PUSH(ctx->cfa_expr_ix); + } + } + + while (True) { + + vg_assert(sp >= -1 && sp < N_EXPR_STACK); + + if (expr > limit) + return -1; /* overrun - something's wrong */ + + if (expr == limit) { + /* end of expr - return expr on the top of stack. */ + if (sp == -1) + return -1; /* stack empty. Bad. */ + else + break; + } + + op = 0; opname = NULL; /* excessively conservative */ + + opcode = *expr++; + switch (opcode) { + + case DW_OP_lit0 ... DW_OP_lit31: + /* push: literal 0 .. 31 */ + sw = (Word)opcode - (Word)DW_OP_lit0; + vg_assert(sw >= 0 && sw <= 31); + PUSH( ML_(CfiExpr_Const)( dst, (UWord)sw ) ); + if (ddump_frames) + VG_(printf)("DW_OP_lit%ld", sw); + break; + + case DW_OP_breg0 ... DW_OP_breg31: + /* push: reg + sleb128 */ + reg = (Int)opcode - (Int)DW_OP_breg0; + vg_assert(reg >= 0 && reg <= 31); + sw = read_leb128S( &expr ); + ix = ML_(CfiExpr_Binop)( dst, + Cop_Add, + ML_(CfiExpr_DwReg)( dst, reg ), + ML_(CfiExpr_Const)( dst, (UWord)sw ) + ); + PUSH(ix); + if (ddump_frames) + VG_(printf)("DW_OP_breg%d: %ld", reg, sw); + break; + + case DW_OP_reg0 ... DW_OP_reg31: + /* push: reg */ + reg = (Int)opcode - (Int)DW_OP_reg0; + vg_assert(reg >= 0 && reg <= 31); + ix = ML_(CfiExpr_DwReg)( dst, reg ); + PUSH(ix); + if (ddump_frames) + VG_(printf)("DW_OP_reg%d", reg); + break; + + case DW_OP_plus_uconst: + uw = read_leb128U( &expr ); + PUSH( ML_(CfiExpr_Const)( dst, uw ) ); + POP( ix ); + POP( ix2 ); + PUSH( ML_(CfiExpr_Binop)( dst, op, ix2, ix ) ); + if (ddump_frames) + VG_(printf)("DW_OP_plus_uconst: %lu", uw); + break; + + case DW_OP_const4s: + /* push: 32-bit signed immediate */ + sw = read_le_s_encoded_literal( expr, 4 ); + expr += 4; + PUSH( ML_(CfiExpr_Const)( dst, (UWord)sw ) ); + if (ddump_frames) + VG_(printf)("DW_OP_const4s: %ld", sw); + break; + + case DW_OP_const1s: + /* push: 8-bit signed immediate */ + sw = read_le_s_encoded_literal( expr, 1 ); + expr += 1; + PUSH( ML_(CfiExpr_Const)( dst, (UWord)sw ) ); + if (ddump_frames) + VG_(printf)("DW_OP_const1s: %ld", sw); + break; + + case DW_OP_minus: + op = Cop_Sub; opname = "minus"; goto binop; + case DW_OP_plus: + op = Cop_Add; opname = "plus"; goto binop; + case DW_OP_and: + op = Cop_And; opname = "and"; goto binop; + case DW_OP_mul: + op = Cop_Mul; opname = "mul"; goto binop; + binop: + POP( ix ); + POP( ix2 ); + PUSH( ML_(CfiExpr_Binop)( dst, op, ix2, ix ) ); + if (ddump_frames) + VG_(printf)("DW_OP_%s", opname); + break; + + case DW_OP_deref: + POP( ix ); + PUSH( ML_(CfiExpr_Deref)( dst, ix ) ); + if (ddump_frames) + VG_(printf)("DW_OP_deref"); + break; + + default: + if (!VG_(clo_xml)) + VG_(message)(Vg_DebugMsg, + "Warning: DWARF2 CFI reader: unhandled DW_OP_ " + "opcode 0x%x", (Int)opcode); + return -1; + } + + if (expr < limit && ddump_frames) + VG_(printf)("; "); + + } + + vg_assert(sp >= -1 && sp < N_EXPR_STACK); + if (sp == -1) + return -1; + + if (0 && ddump_frames) + ML_(ppCfiExpr)( dst, stack[sp] ); + return stack[sp]; + +# undef POP +# undef PUSH +# undef N_EXPR_STACK +} + + +/* ------------ Run/show CFI instructions ------------ */ + +/* Run a CFI instruction, and also return its length. + Returns 0 if the instruction could not be executed. +*/ +static Int run_CF_instruction ( /*MOD*/UnwindContext* ctx, + UChar* instr, + UnwindContext* restore_ctx, + AddressDecodingInfo* adi, + struct _DebugInfo* di ) +{ + Int off, reg, reg2, nleb, len; + UInt delta; + UChar* expr; + Int j; + Int i = 0; + UChar hi2 = (instr[i] >> 6) & 3; + UChar lo6 = instr[i] & 0x3F; + Addr printing_bias = ((Addr)ctx->initloc) - ((Addr)di->text_bias); + i++; + + if (hi2 == DW_CFA_advance_loc) { + delta = (UInt)lo6; + ctx->loc += delta; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_advance_loc: %d to %08lx\n", + (Int)delta, (Addr)ctx->loc + printing_bias); + return i; + } + + if (hi2 == DW_CFA_offset) { + /* Set rule for reg 'lo6' to CFAOff(off * data_af) */ + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + reg = (Int)lo6; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->reg[reg].tag = RR_CFAOff; + ctx->reg[reg].arg = off * ctx->data_a_f; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_offset: r%d at cfa%s%d\n", + (Int)reg, ctx->reg[reg].arg < 0 ? "" : "+", + (Int)ctx->reg[reg].arg ); + return i; + } + + if (hi2 == DW_CFA_restore) { + reg = (Int)lo6; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + if (restore_ctx == NULL) + return 0; /* fail */ + ctx->reg[reg] = restore_ctx->reg[reg]; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_restore: r%d\n", (Int)reg); + return i; + } + + vg_assert(hi2 == DW_CFA_use_secondary); + + switch (lo6) { + case DW_CFA_nop: + if (di->ddump_frames) + VG_(printf)(" DW_CFA_nop\n"); + break; + case DW_CFA_set_loc: + /* WAS: + ctx->loc = read_Addr(&instr[i]) - ctx->initloc; i+= sizeof(Addr); + Was this ever right? */ + /* 2007 Feb 23: No. binutils/dwarf.c treats it as an encoded + address and that appears to be in accordance with the + DWARF3 spec. */ + ctx->loc = read_encoded_Addr(&len, adi, &instr[i]); + i += len; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_set_loc\n"); + break; + case DW_CFA_advance_loc1: + delta = (UInt)read_UChar(&instr[i]); i+= sizeof(UChar); + ctx->loc += delta; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_advance_loc1: %d to %08lx\n", + (Int)delta, (Addr)ctx->loc + printing_bias); + break; + case DW_CFA_advance_loc2: + delta = (UInt)read_UShort(&instr[i]); i+= sizeof(UShort); + ctx->loc += delta; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_advance_loc2: %d to %08lx\n", + (Int)delta, (Addr)ctx->loc + printing_bias); + break; + case DW_CFA_advance_loc4: + delta = (UInt)read_UInt(&instr[i]); i+= sizeof(UInt); + ctx->loc += delta; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_advance_loc4: %d to %08lx\n", + (Int)delta, (Addr)ctx->loc + printing_bias); + break; + + case DW_CFA_def_cfa: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->cfa_is_regoff = True; + ctx->cfa_expr_ix = 0; + ctx->cfa_reg = reg; + ctx->cfa_off = off; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_def_cfa: r%d ofs %d\n", (Int)reg, (Int)off); + break; + + case DW_CFA_def_cfa_sf: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 1 ); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->cfa_is_regoff = True; + ctx->cfa_expr_ix = 0; + ctx->cfa_reg = reg; + ctx->cfa_off = off * ctx->data_a_f; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_def_cfa_sf\n"); + break; + + case DW_CFA_register: + reg = read_leb128( &instr[i], &nleb, 0); + i += nleb; + reg2 = read_leb128( &instr[i], &nleb, 0); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + if (reg2 < 0 || reg2 >= N_CFI_REGS) + return 0; /* fail */ + ctx->reg[reg].tag = RR_Reg; + ctx->reg[reg].arg = reg2; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_register: r%d in r%d\n", + (Int)reg, (Int)reg2); + break; + + case DW_CFA_offset_extended: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->reg[reg].tag = RR_CFAOff; + ctx->reg[reg].arg = off * ctx->data_a_f; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_offset_extended\n"); + break; + + case DW_CFA_offset_extended_sf: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 1 ); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->reg[reg].tag = RR_CFAOff; + ctx->reg[reg].arg = off * ctx->data_a_f; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_offset_extended_sf: r%d at cfa%s%d\n", + reg, ctx->reg[reg].arg < 0 ? "" : "+", + (Int)ctx->reg[reg].arg); + break; + + case DW_CFA_GNU_negative_offset_extended: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->reg[reg].tag = RR_CFAOff; + ctx->reg[reg].arg = (-off) * ctx->data_a_f; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_GNU_negative_offset_extended\n"); + break; + + case DW_CFA_restore_extended: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + if (restore_ctx == NULL) + return 0; /* fail */ + ctx->reg[reg] = restore_ctx->reg[reg]; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_restore_extended\n"); + break; + + case DW_CFA_val_offset: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->reg[reg].tag = RR_CFAValOff; + ctx->reg[reg].arg = off * ctx->data_a_f; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_val_offset\n"); + break; + + case DW_CFA_val_offset_sf: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 1 ); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->reg[reg].tag = RR_CFAValOff; + ctx->reg[reg].arg = off * ctx->data_a_f; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_val_offset_sf\n"); + break; + + case DW_CFA_def_cfa_register: + reg = read_leb128( &instr[i], &nleb, 0); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->cfa_is_regoff = True; + ctx->cfa_expr_ix = 0; + ctx->cfa_reg = reg; + /* ->cfa_off unchanged */ + if (di->ddump_frames) + VG_(printf)(" DW_CFA_def_cfa_reg: r%d\n", (Int)reg ); + break; + + case DW_CFA_def_cfa_offset: + off = read_leb128( &instr[i], &nleb, 0); + i += nleb; + ctx->cfa_is_regoff = True; + ctx->cfa_expr_ix = 0; + /* ->reg is unchanged */ + ctx->cfa_off = off; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_def_cfa_offset: %d\n", (Int)off); + break; + + case DW_CFA_def_cfa_offset_sf: + off = read_leb128( &instr[i], &nleb, 1); + i += nleb; + ctx->cfa_is_regoff = True; + ctx->cfa_expr_ix = 0; + /* ->reg is unchanged */ + ctx->cfa_off = off * ctx->data_a_f; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_def_cfa_offset_sf: %d\n", ctx->cfa_off); + break; + + case DW_CFA_undefined: + reg = read_leb128( &instr[i], &nleb, 0); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->reg[reg].tag = RR_Undef; + ctx->reg[reg].arg = 0; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_undefined\n"); + break; + + case DW_CFA_same_value: + reg = read_leb128( &instr[i], &nleb, 0); + i += nleb; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + ctx->reg[reg].tag = RR_Same; + ctx->reg[reg].arg = 0; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_same_value\n"); + break; + + case DW_CFA_GNU_args_size: + /* No idea what is supposed to happen. gdb-6.3 simply + ignores these. */ + /*off = */ read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + if (di->ddump_frames) + VG_(printf)(" rci:DW_CFA_GNU_args_size (ignored)\n"); + break; + + case DW_CFA_expression: + /* Identical to DW_CFA_val_expression except that the value + computed is an address and so needs one final + dereference. */ + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + len = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + expr = &instr[i]; + i += len; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + if (di->ddump_frames) + VG_(printf)(" DW_CFA_expression: r%d (", + (Int)reg); + /* Convert the expression into a dag rooted at ctx->exprs index j, + or fail. */ + j = dwarfexpr_to_dag ( ctx, expr, len, True/*push CFA at start*/, + di->ddump_frames); + if (di->ddump_frames) + VG_(printf)(")\n"); + vg_assert(j >= -1); + if (j >= 0) { + vg_assert(ctx->exprs); + vg_assert( j < VG_(sizeXA)(ctx->exprs) ); + } + if (j == -1) + return 0; /* fail */ + /* Add an extra dereference */ + j = ML_(CfiExpr_Deref)( ctx->exprs, j ); + ctx->reg[reg].tag = RR_ValExpr; + ctx->reg[reg].arg = j; + break; + + case DW_CFA_val_expression: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + len = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + expr = &instr[i]; + i += len; + if (reg < 0 || reg >= N_CFI_REGS) + return 0; /* fail */ + if (di->ddump_frames) + VG_(printf)(" DW_CFA_val_expression: r%d (", + (Int)reg); + /* Convert the expression into a dag rooted at ctx->exprs index j, + or fail. */ + j = dwarfexpr_to_dag ( ctx, expr, len, True/*push CFA at start*/, + di->ddump_frames); + if (di->ddump_frames) + VG_(printf)(")\n"); + vg_assert(j >= -1); + if (j >= 0) { + vg_assert(ctx->exprs); + vg_assert( j < VG_(sizeXA)(ctx->exprs) ); + } + if (j == -1) + return 0; /* fail */ + ctx->reg[reg].tag = RR_ValExpr; + ctx->reg[reg].arg = j; + break; + + case DW_CFA_def_cfa_expression: + len = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + expr = &instr[i]; + i += len; + if (di->ddump_frames) + VG_(printf)(" DW_CFA_def_cfa_expression ("); + /* Convert the expression into a dag rooted at ctx->exprs index j, + or fail. */ + j = dwarfexpr_to_dag ( ctx, expr, len, True/*push CFA at start*/, + di->ddump_frames); + if (di->ddump_frames) + VG_(printf)(")\n"); + ctx->cfa_is_regoff = False; + ctx->cfa_reg = 0; + ctx->cfa_off = 0; + ctx->cfa_expr_ix = j; + break; + + case DW_CFA_GNU_window_save: + /* Ignored. This appears to be sparc-specific; quite why it + turns up in SuSE-supplied x86 .so's beats me. */ + if (di->ddump_frames) + VG_(printf)("DW_CFA_GNU_window_save\n"); + break; + + default: + VG_(message)(Vg_DebugMsg, "DWARF2 CFI reader: unhandled CFI " + "instruction 0:%d", (Int)lo6); + if (di->ddump_frames) + VG_(printf)(" rci:run_CF_instruction:default\n"); + i = 0; + break; + } + + return i; +} + + +/* Show a CFI instruction, and also return its length. Show it as + close as possible (preferably identical) to how GNU binutils + readelf --debug-dump=frames would. */ + +static Int show_CF_instruction ( UChar* instr, + AddressDecodingInfo* adi, + Int code_a_f, Int data_a_f ) +{ + UInt delta; + Int off, coff, reg, reg2, nleb, len; + Addr loc; + Int i = 0; + UChar hi2 = (instr[i] >> 6) & 3; + UChar lo6 = instr[i] & 0x3F; + i++; + + if (0) VG_(printf)("raw:%x/%x:%x:%x:%x:%x:%x:%x:%x:%x\n", + hi2, lo6, + instr[i+0], instr[i+1], instr[i+2], instr[i+3], + instr[i+4], instr[i+5], instr[i+6], instr[i+7] ); + + if (hi2 == DW_CFA_advance_loc) { + VG_(printf)(" sci:DW_CFA_advance_loc(%d)\n", (Int)lo6); + return i; + } + + if (hi2 == DW_CFA_offset) { + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + coff = off * data_a_f; + VG_(printf)(" DW_CFA_offset: r%d at cfa%s%d\n", + (Int)lo6, coff < 0 ? "" : "+", (Int)coff ); + return i; + } + + if (hi2 == DW_CFA_restore) { + VG_(printf)(" sci:DW_CFA_restore(r%d)\n", (Int)lo6); + return i; + } + + vg_assert(hi2 == DW_CFA_use_secondary); + + switch (lo6) { + + case DW_CFA_nop: + VG_(printf)(" DW_CFA_nop\n"); + break; + + case DW_CFA_set_loc: + /* WAS: loc = read_Addr(&instr[i]); i+= sizeof(Addr); + (now known to be incorrect -- the address is encoded) */ + loc = read_encoded_Addr(&len, adi, &instr[i]); + i += len; + VG_(printf)(" sci:DW_CFA_set_loc(%#lx)\n", loc); + break; + + case DW_CFA_advance_loc1: + delta = (UInt)read_UChar(&instr[i]); i+= sizeof(UChar); + VG_(printf)(" sci:DW_CFA_advance_loc1(%d)\n", delta); + break; + + case DW_CFA_advance_loc2: + delta = (UInt)read_UShort(&instr[i]); i+= sizeof(UShort); + VG_(printf)(" sci:DW_CFA_advance_loc2(%d)\n", delta); + break; + + case DW_CFA_advance_loc4: + delta = (UInt)read_UInt(&instr[i]); i+= sizeof(UInt); + VG_(printf)(" DW_CFA_advance_loc4(%d)\n", delta); + break; + + case DW_CFA_def_cfa: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + VG_(printf)(" DW_CFA_def_cfa: r%d ofs %d\n", (Int)reg, (Int)off); + break; + + case DW_CFA_def_cfa_sf: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 1 ); + i += nleb; + VG_(printf)(" DW_CFA_def_cfa_sf: r%d ofs %d\n", + (Int)reg, (Int)(off * data_a_f)); + break; + + case DW_CFA_register: + reg = read_leb128( &instr[i], &nleb, 0); + i += nleb; + reg2 = read_leb128( &instr[i], &nleb, 0); + i += nleb; + VG_(printf)(" sci:DW_CFA_register(r%d, r%d)\n", reg, reg2); + break; + + case DW_CFA_def_cfa_register: + reg = read_leb128( &instr[i], &nleb, 0); + i += nleb; + VG_(printf)(" sci:DW_CFA_def_cfa_register(r%d)\n", reg); + break; + + case DW_CFA_def_cfa_offset: + off = read_leb128( &instr[i], &nleb, 0); + i += nleb; + VG_(printf)(" sci:DW_CFA_def_cfa_offset(%d)\n", off); + break; + + case DW_CFA_def_cfa_offset_sf: + off = read_leb128( &instr[i], &nleb, 1); + i += nleb; + VG_(printf)(" sci:DW_CFA_def_cfa_offset_sf(%d)\n", off); + break; + + case DW_CFA_restore_extended: + reg = read_leb128( &instr[i], &nleb, 0); + i += nleb; + VG_(printf)(" sci:DW_CFA_restore_extended(r%d)\n", reg); + break; + + case DW_CFA_undefined: + reg = read_leb128( &instr[i], &nleb, 0); + i += nleb; + VG_(printf)(" sci:DW_CFA_undefined(r%d)\n", reg); + break; + + case DW_CFA_same_value: + reg = read_leb128( &instr[i], &nleb, 0); + i += nleb; + VG_(printf)(" sci:DW_CFA_same_value(r%d)\n", reg); + break; + + case DW_CFA_remember_state: + VG_(printf)(" sci:DW_CFA_remember_state\n"); + break; + + case DW_CFA_restore_state: + VG_(printf)(" sci:DW_CFA_restore_state\n"); + break; + + case DW_CFA_GNU_args_size: + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + VG_(printf)(" sci:DW_CFA_GNU_args_size(%d)\n", off ); + break; + + case DW_CFA_def_cfa_expression: + len = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + i += len; + VG_(printf)(" sci:DW_CFA_def_cfa_expression(length %d)\n", len); + break; + + case DW_CFA_expression: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + len = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + i += len; + VG_(printf)(" sci:DW_CFA_expression(r%d, length %d)\n", reg, len); + break; + + case DW_CFA_val_expression: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + len = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + i += len; + VG_(printf)(" sci:DW_CFA_val_expression(r%d, length %d)\n", reg, len); + break; + + case DW_CFA_offset_extended: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + VG_(printf)(" sci:DW_CFA_offset_extended(r%d, " + "off %d x data_af)\n", reg, off); + break; + + case DW_CFA_offset_extended_sf: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 1 ); + i += nleb; + coff = (Int)(off * data_a_f); + VG_(printf)(" DW_CFA_offset_extended_sf: r%d at cfa%s%d\n", + reg, coff < 0 ? "" : "+", coff); + break; + + case DW_CFA_GNU_negative_offset_extended: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + VG_(printf)(" sci:DW_CFA_GNU_negative_offset_extended" + "(r%d, off %d x data_af)\n", reg, -off); + break; + + case DW_CFA_val_offset: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + VG_(printf)(" sci:DW_CFA_val_offset(r%d, off %d x data_af)\n", + reg, off); + break; + + case DW_CFA_val_offset_sf: + reg = read_leb128( &instr[i], &nleb, 0 ); + i += nleb; + off = read_leb128( &instr[i], &nleb, 1 ); + i += nleb; + VG_(printf)(" sci:DW_CFA_val_offset_sf(r%d, off %d x data_af)\n", + reg, off); + break; + + case DW_CFA_GNU_window_save: + VG_(printf)(" sci:DW_CFA_GNU_window_save\n"); + break; + + default: + VG_(printf)(" sci:0:%d\n", (Int)lo6); + break; + } + + return i; +} + + +/* Show the instructions in instrs[0 .. ilen-1]. */ +static void show_CF_instructions ( UChar* instrs, Int ilen, + AddressDecodingInfo* adi, + Int code_a_f, Int data_a_f ) +{ + Int i = 0; + while (True) { + if (i >= ilen) break; + i += show_CF_instruction( &instrs[i], adi, code_a_f, data_a_f ); + } +} + + +/* Run the CF instructions in instrs[0 .. ilen-1], until the end is + reached, or until there is a failure. Return True iff success. +*/ +static +Bool run_CF_instructions ( struct _DebugInfo* di, + Bool record, + UnwindContext* ctx, UChar* instrs, Int ilen, + UWord fde_arange, + UnwindContext* restore_ctx, + AddressDecodingInfo* adi ) +{ + DiCfSI cfsi; + Bool summ_ok; + Int j, i = 0; + Addr loc_prev; + if (0) ppUnwindContext(ctx); + if (0) ppUnwindContext_summary(ctx); + while (True) { + loc_prev = ctx->loc; + if (i >= ilen) break; + if (0) (void)show_CF_instruction( &instrs[i], adi, + ctx->code_a_f, ctx->data_a_f ); + j = run_CF_instruction( ctx, &instrs[i], restore_ctx, adi, di ); + if (j == 0) + return False; /* execution failed */ + i += j; + if (0) ppUnwindContext(ctx); + if (record && loc_prev != ctx->loc) { + summ_ok = summarise_context ( &cfsi, loc_prev, ctx, di ); + if (summ_ok) { + ML_(addDiCfSI)(di, &cfsi); + if (di->trace_cfi) + ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi); + } + } + } + if (ctx->loc < fde_arange) { + loc_prev = ctx->loc; + ctx->loc = fde_arange; + if (record) { + summ_ok = summarise_context ( &cfsi, loc_prev, ctx, di ); + if (summ_ok) { + ML_(addDiCfSI)(di, &cfsi); + if (di->trace_cfi) + ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi); + } + } + } + return True; +} + + +/* ------------ Main entry point for CFI reading ------------ */ + +typedef + struct { + /* This gives the CIE an identity to which FDEs will refer. */ + ULong offset; + /* Code, data factors. */ + Int code_a_f; + Int data_a_f; + /* Return-address pseudo-register. */ + Int ra_reg; + UChar address_encoding; + /* Where are the instrs? Note, this are simply pointers back to + the transiently-mapped-in section. */ + UChar* instrs; + Int ilen; + /* God knows .. don't ask */ + Bool saw_z_augmentation; + } + CIE; + +static void init_CIE ( CIE* cie ) +{ + cie->offset = 0; + cie->code_a_f = 0; + cie->data_a_f = 0; + cie->ra_reg = 0; + cie->address_encoding = 0; + cie->instrs = NULL; + cie->ilen = 0; + cie->saw_z_augmentation = False; +} + +#define N_CIEs 2000 +static CIE the_CIEs[N_CIEs]; + + +void ML_(read_callframe_info_dwarf3) + ( /*OUT*/struct _DebugInfo* di, UChar* ehframe_image ) +{ + Int nbytes; + HChar* how = NULL; + Int n_CIEs = 0; + UChar* data = ehframe_image; + +# if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) + /* These targets don't use CFI-based stack unwinding. */ + return; +# endif + + if (di->trace_cfi) { + VG_(printf)("\n-----------------------------------------------\n"); + VG_(printf)("CFI info: szB %ld, _avma %#lx, _image %p\n", + di->ehframe_size, di->ehframe_avma, + ehframe_image ); + VG_(printf)("CFI info: name %s\n", + di->filename ); + } + + /* Loop over CIEs/FDEs */ + + /* Conceptually, the frame info is a sequence of FDEs, one for each + function. Inside an FDE is a miniature program for a special + state machine, which, when run, produces the stack-unwinding + info for that function. + + Because the FDEs typically have much in common, and because the + DWARF designers appear to have been fanatical about space + saving, the common parts are factored out into so-called CIEs. + That means that what we traverse is a sequence of structs, each + of which is either a FDE (usually) or a CIE (occasionally). + Each FDE has a field indicating which CIE is the one pertaining + to it. + + The following loop traverses the sequence. FDEs are dealt with + immediately; once we harvest the useful info in an FDE, it is + then forgotten about. By contrast, CIEs are validated and + dumped into an array, because later FDEs may refer to any + previously-seen CIE. + */ + while (True) { + UChar* ciefde_start; + ULong ciefde_len; + ULong cie_pointer; + Bool dw64; + + /* Are we done? */ + if (data == ehframe_image + di->ehframe_size) + return; + + /* Overshot the end? Means something is wrong */ + if (data > ehframe_image + di->ehframe_size) { + how = "overran the end of .eh_frame"; + goto bad; + } + + /* Ok, we must be looking at the start of a new CIE or FDE. + Figure out which it is. */ + + ciefde_start = data; + if (di->trace_cfi) + VG_(printf)("\ncie/fde.start = %p (ehframe_image + 0x%lx)\n", + ciefde_start, + ciefde_start - ehframe_image + 0UL); + + ciefde_len = (ULong) read_UInt(data); data += sizeof(UInt); + if (di->trace_cfi) + VG_(printf)("cie/fde.length = %lld\n", ciefde_len); + + /* Apparently, if the .length field is zero, we are at the end + of the sequence. This is stated in the Generic Elf + Specification (see comments far above here) and is one of the + places where .eh_frame and .debug_frame data differ. */ + if (ciefde_len == 0) { + if (di->ddump_frames) + VG_(printf)("%08lx ZERO terminator\n\n", + ((Addr)ciefde_start) - ((Addr)ehframe_image)); + return; + } + + /* If the .length field is 0xFFFFFFFF then we're dealing with + 64-bit DWARF, and the real length is stored as a 64-bit + number immediately following it. */ + dw64 = False; + if (ciefde_len == 0xFFFFFFFFUL) { + dw64 = True; + ciefde_len = read_ULong(data); data += sizeof(ULong); + } + + /* Now get the CIE ID, whose size depends on the DWARF 32 vs + 64-ness. */ + if (dw64) { + cie_pointer = read_ULong(data); + data += sizeof(ULong); /* XXX see XXX below */ + } else { + cie_pointer = (ULong)read_UInt(data); + data += sizeof(UInt); /* XXX see XXX below */ + } + + if (di->trace_cfi) + VG_(printf)("cie.pointer = %lld\n", cie_pointer); + + /* If cie_pointer is zero, we've got a CIE; else it's an FDE. */ + if (cie_pointer == 0) { + + Int this_CIE; + UChar cie_version; + UChar* cie_augmentation; + + /* --------- CIE --------- */ + if (di->trace_cfi) + VG_(printf)("------ new CIE (#%d of 0 .. %d) ------\n", + n_CIEs, N_CIEs - 1); + + /* Allocate a new CIE record. */ + vg_assert(n_CIEs >= 0 && n_CIEs <= N_CIEs); + if (n_CIEs == N_CIEs) { + how = "N_CIEs is too low. Increase and recompile."; + goto bad; + } + + this_CIE = n_CIEs; + n_CIEs++; + init_CIE( &the_CIEs[this_CIE] ); + + /* Record its offset. This is how we will find it again + later when looking at an FDE. */ + the_CIEs[this_CIE].offset = (ULong)(ciefde_start - ehframe_image); + + if (di->ddump_frames) + VG_(printf)("%08lx %08lx %08lx CIE\n", + ((Addr)ciefde_start) - ((Addr)ehframe_image), + (Addr)ciefde_len, + (Addr)(UWord)cie_pointer ); + + cie_version = read_UChar(data); data += sizeof(UChar); + if (di->trace_cfi) + VG_(printf)("cie.version = %d\n", (Int)cie_version); + if (di->ddump_frames) + VG_(printf)(" Version: %d\n", (Int)cie_version); + if (cie_version != 1) { + how = "unexpected CIE version (not 1)"; + goto bad; + } + + cie_augmentation = data; + data += 1 + VG_(strlen)(cie_augmentation); + if (di->trace_cfi) + VG_(printf)("cie.augment = \"%s\"\n", cie_augmentation); + if (di->ddump_frames) + VG_(printf)(" Augmentation: \"%s\"\n", cie_augmentation); + + if (cie_augmentation[0] == 'e' && cie_augmentation[1] == 'h') { + data += sizeof(Addr); + cie_augmentation += 2; + } + + the_CIEs[this_CIE].code_a_f = read_leb128( data, &nbytes, 0); + data += nbytes; + if (di->trace_cfi) + VG_(printf)("cie.code_af = %d\n", + the_CIEs[this_CIE].code_a_f); + if (di->ddump_frames) + VG_(printf)(" Code alignment factor: %d\n", + (Int)the_CIEs[this_CIE].code_a_f); + + the_CIEs[this_CIE].data_a_f = read_leb128( data, &nbytes, 1); + data += nbytes; + if (di->trace_cfi) + VG_(printf)("cie.data_af = %d\n", + the_CIEs[this_CIE].data_a_f); + if (di->ddump_frames) + VG_(printf)(" Data alignment factor: %d\n", + (Int)the_CIEs[this_CIE].data_a_f); + + the_CIEs[this_CIE].ra_reg = (Int)read_UChar(data); + data += sizeof(UChar); + if (di->trace_cfi) + VG_(printf)("cie.ra_reg = %d\n", + the_CIEs[this_CIE].ra_reg); + if (di->ddump_frames) + VG_(printf)(" Return address column: %d\n", + (Int)the_CIEs[this_CIE].ra_reg); + + if (the_CIEs[this_CIE].ra_reg < 0 + || the_CIEs[this_CIE].ra_reg >= N_CFI_REGS) { + how = "cie.ra_reg has implausible value"; + goto bad; + } + + the_CIEs[this_CIE].saw_z_augmentation + = *cie_augmentation == 'z'; + if (the_CIEs[this_CIE].saw_z_augmentation) { + UInt length = read_leb128( data, &nbytes, 0); + data += nbytes; + the_CIEs[this_CIE].instrs = data + length; + cie_augmentation++; + if (di->ddump_frames) { + UInt i; + VG_(printf)(" Augmentation data: "); + for (i = 0; i < length; i++) + VG_(printf)(" %02x", (UInt)data[i]); + VG_(printf)("\n"); + } + } else { + the_CIEs[this_CIE].instrs = NULL; + } + + the_CIEs[this_CIE].address_encoding = default_Addr_encoding(); + + while (*cie_augmentation) { + switch (*cie_augmentation) { + case 'L': + data++; + cie_augmentation++; + break; + case 'R': + the_CIEs[this_CIE].address_encoding + = read_UChar(data); data += sizeof(UChar); + cie_augmentation++; + break; + case 'P': + data += size_of_encoded_Addr( read_UChar(data) ); + data++; + cie_augmentation++; + break; + case 'S': + cie_augmentation++; + break; + default: + if (the_CIEs[this_CIE].instrs == NULL) { + how = "unhandled cie.augmentation"; + goto bad; + } + data = the_CIEs[this_CIE].instrs; + goto done_augmentation; + } + } + + done_augmentation: + + if (di->trace_cfi) + VG_(printf)("cie.encoding = 0x%x\n", + the_CIEs[this_CIE].address_encoding); + + the_CIEs[this_CIE].instrs = data; + the_CIEs[this_CIE].ilen + = ciefde_start + ciefde_len + sizeof(UInt) - data; + if (di->trace_cfi) { + VG_(printf)("cie.instrs = %p\n", the_CIEs[this_CIE].instrs); + VG_(printf)("cie.ilen = %d\n", the_CIEs[this_CIE].ilen); + } + + if (the_CIEs[this_CIE].ilen < 0 + || the_CIEs[this_CIE].ilen > di->ehframe_size) { + how = "implausible # cie initial insns"; + goto bad; + } + + data += the_CIEs[this_CIE].ilen; + + /* Show the CIE's instructions (the preamble for each FDE + that uses this CIE). */ + if (di->ddump_frames) + VG_(printf)("\n"); + + if (di->trace_cfi || di->ddump_frames) { + AddressDecodingInfo adi; + adi.encoding = the_CIEs[this_CIE].address_encoding; + adi.ehframe_image = ehframe_image; + adi.ehframe_avma = di->ehframe_avma; + adi.text_bias = di->text_debug_bias; + show_CF_instructions( the_CIEs[this_CIE].instrs, + the_CIEs[this_CIE].ilen, &adi, + the_CIEs[this_CIE].code_a_f, + the_CIEs[this_CIE].data_a_f ); + } + + if (di->ddump_frames) + VG_(printf)("\n"); + + } else { + + AddressDecodingInfo adi; + UnwindContext ctx, restore_ctx; + Int cie; + ULong look_for; + Bool ok; + Addr fde_initloc; + UWord fde_arange; + UChar* fde_instrs; + Int fde_ilen; + + /* --------- FDE --------- */ + + /* Find the relevant CIE. The CIE we want is located + cie_pointer bytes back from here. */ + + /* re sizeof(UInt) / sizeof(ULong), matches XXX above. */ + look_for = (data - (dw64 ? sizeof(ULong) : sizeof(UInt)) + - ehframe_image) + - cie_pointer; + + for (cie = 0; cie < n_CIEs; cie++) { + if (0) VG_(printf)("look for %lld %lld\n", + look_for, the_CIEs[cie].offset ); + if (the_CIEs[cie].offset == look_for) + break; + } + vg_assert(cie >= 0 && cie <= n_CIEs); + if (cie == n_CIEs) { + how = "FDE refers to not-findable CIE"; + goto bad; + } + + adi.encoding = the_CIEs[cie].address_encoding; + adi.ehframe_image = ehframe_image; + adi.ehframe_avma = di->ehframe_avma; + adi.text_bias = di->text_debug_bias; + fde_initloc = read_encoded_Addr(&nbytes, &adi, data); + data += nbytes; + if (di->trace_cfi) + VG_(printf)("fde.initloc = %#lx\n", fde_initloc); + + adi.encoding = the_CIEs[cie].address_encoding & 0xf; + adi.ehframe_image = ehframe_image; + adi.ehframe_avma = di->ehframe_avma; + adi.text_bias = di->text_debug_bias; + + /* WAS (incorrectly): + fde_arange = read_encoded_Addr(&nbytes, &adi, data); + data += nbytes; + The following corresponds to what binutils/dwarf.c does: + */ + { UInt ptr_size = size_of_encoded_Addr( adi.encoding ); + switch (ptr_size) { + case 8: case 4: case 2: case 1: + fde_arange + = (UWord)read_le_u_encoded_literal(data, ptr_size); + data += ptr_size; + break; + default: + how = "unknown arange field encoding in FDE"; + goto bad; + } + } + + if (di->trace_cfi) + VG_(printf)("fde.arangec = %#lx\n", fde_arange); + + if (di->ddump_frames) + VG_(printf)("%08lx %08lx %08lx FDE cie=%08lx pc=%08lx..%08lx\n", + ((Addr)ciefde_start) - ((Addr)ehframe_image), + (Addr)ciefde_len, + (Addr)(UWord)cie_pointer, + (Addr)look_for, + ((Addr)fde_initloc) - di->text_debug_bias, + ((Addr)fde_initloc) - di->text_debug_bias + fde_arange); + + if (the_CIEs[cie].saw_z_augmentation) { + UInt length = read_leb128( data, &nbytes, 0); + data += nbytes; + if (di->ddump_frames && (length > 0)) { + UInt i; + VG_(printf)(" Augmentation data: "); + for (i = 0; i < length; i++) + VG_(printf)(" %02x", (UInt)data[i]); + VG_(printf)("\n\n"); + } + data += length; + } + + fde_instrs = data; + fde_ilen = ciefde_start + ciefde_len + sizeof(UInt) - data; + if (di->trace_cfi) { + VG_(printf)("fde.instrs = %p\n", fde_instrs); + VG_(printf)("fde.ilen = %d\n", (Int)fde_ilen); + } + + if (fde_ilen < 0 || fde_ilen > di->ehframe_size) { + how = "implausible # fde insns"; + goto bad; + } + + data += fde_ilen; + + adi.encoding = the_CIEs[cie].address_encoding; + adi.ehframe_image = ehframe_image; + adi.ehframe_avma = di->ehframe_avma; + adi.text_bias = di->text_debug_bias; + + if (di->trace_cfi) + show_CF_instructions( fde_instrs, fde_ilen, &adi, + the_CIEs[cie].code_a_f, + the_CIEs[cie].data_a_f ); + + initUnwindContext(&ctx); + ctx.code_a_f = the_CIEs[cie].code_a_f; + ctx.data_a_f = the_CIEs[cie].data_a_f; + ctx.initloc = fde_initloc; + ctx.ra_reg = the_CIEs[cie].ra_reg; + ctx.exprs = VG_(newXA)( ML_(dinfo_zalloc), "di.rcid.1", + ML_(dinfo_free), + sizeof(CfiExpr) ); + vg_assert(ctx.exprs); + + /* Run the CIE's instructions. Ugly hack: if + --debug-dump=frames is in effect, suppress output for + these instructions since they will already have been shown + at the time the CIE was first encountered. Note, not + thread safe - if this reader is ever made threaded, should + fix properly. */ + { Bool hack = di->ddump_frames; + di->ddump_frames = False; + initUnwindContext(&restore_ctx); + ok = run_CF_instructions( + di, False, &ctx, the_CIEs[cie].instrs, + the_CIEs[cie].ilen, 0, NULL, &adi + ); + di->ddump_frames = hack; + } + /* And now run the instructions for the FDE, starting from + the state created by running the CIE preamble + instructions. */ + if (ok) { + restore_ctx = ctx; + ok = run_CF_instructions( + di, True, &ctx, fde_instrs, fde_ilen, fde_arange, + &restore_ctx, &adi + ); + if (di->ddump_frames) + VG_(printf)("\n"); + } + + VG_(deleteXA)( ctx.exprs ); + } + } + + return; + + bad: + if (!VG_(clo_xml) && VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, "Warning: %s in DWARF2 CFI reading", how); + return; +} + + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/readdwarf3.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/readdwarf3.c.svn-base new file mode 100644 index 0000000..d8eed6a --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/readdwarf3.c.svn-base @@ -0,0 +1,3908 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read DWARF3 ".debug_info" sections (DIE trees). ---*/ +/*--- readdwarf3.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2008-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +/* REFERENCE (without which this code will not make much sense): + + DWARF Debugging Information Format, Version 3, + dated 20 December 2005 (the "D3 spec"). + + Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a + .doc (MS Word) version, but for some reason the section numbers + between the Word and PDF versions differ by 1 in the first digit. + All section references in this code are to the PDF version. + + CURRENT HACKS: + + DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is + assumed to mean "const void" or "volatile void" respectively. + GDB appears to interpret them like this, anyway. + + In many cases it is important to know the svma of a CU (the "base + address of the CU", as the D3 spec calls it). There are some + situations in which the spec implies this value is unknown, but the + Dwarf3 produced by gcc-4.1 seems to assume is not unknown but + merely zero when not explicitly stated. So we too have to make + that assumption. + + POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't + unitary_range_list() bias the resulting range list in the same way + that its more general cousin, get_range_list(), does? I don't + know. + + TODO, 2008 Feb 17: + + get rid of cu_svma_known and document the assumed-zero svma hack. + + ML_(sizeOfType): differentiate between zero sized types and types + for which the size is unknown. Is this important? I don't know. + + DW_AT_array_types: deal with explicit sizes (currently we compute + the size from the bounds and the element size, although that's + fragile, if the bounds incompletely specified, or completely + absent) + + Document reason for difference (by 1) of stack preening depth in + parse_var_DIE vs parse_type_DIE. + + Don't hand to ML_(addVars), vars whose locations are entirely in + registers (DW_OP_reg*). This is merely a space-saving + optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these + expressions correctly, by failing to evaluate them and hence + effectively ignoring the variable with which they are associated. + + Deal with DW_AT_array_types which have element size != stride + + In some cases, the info for a variable is split between two + different DIEs (generally a declarer and a definer). We punt on + these. Could do better here. + + The 'data_bias' argument passed to the expression evaluator + (ML_(evaluate_Dwarf3_Expr)) should really be changed to a + MaybeUWord, to make it clear when we do vs don't know what it is + for the evaluation of an expression. At the moment zero is passed + for this parameter in the don't know case. That's a bit fragile + and obscure; using a MaybeUWord would be clearer. + + POTENTIAL PERFORMANCE IMPROVEMENTS: + + Currently, duplicate removal and all other queries for the type + entities array is done using cuOffset-based pointing, which + involves a binary search (VG_(lookupXA)) for each access. This is + wildly inefficient, although simple. It would be better to + translate all the cuOffset-based references (iow, all the "R" and + "Rs" fields in the TyEnts in 'tyents') to direct index numbers in + 'tyents' right at the start of dedup_types(), and use direct + indexing (VG_(indexXA)) wherever possible after that. + + cmp__XArrays_of_AddrRange is also a performance bottleneck. Move + VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use + points, and possibly also make an _UNCHECKED version which skips + the range checks in performance-critical situations such as this. + + Handle interaction between read_DIE and parse_{var,type}_DIE + better. Currently read_DIE reads the entire DIE just to find where + the end is (and for debug printing), so that it can later reliably + move the cursor to the end regardless of what parse_{var,type}_DIE + do. This means many DIEs (most, even?) are read twice. It would + be smarter to make parse_{var,type}_DIE return a Bool indicating + whether or not they advanced the DIE cursor, and only if they + didn't should read_DIE itself read through the DIE. + + ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have + zero variables in their .vars XArray. Rather than have an XArray + with zero elements (which uses 2 malloc'd blocks), allow the .vars + pointer to be NULL in this case. + + More generally, reduce the amount of memory allocated and freed + while reading Dwarf3 type/variable information. Even modest (20MB) + objects cause this module to allocate and free hundreds of + thousands of small blocks, and ML_(arena_malloc) and its various + groupies always show up at the top of performance profiles. */ + +#include "pub_core_basics.h" +#include "pub_core_debuginfo.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_options.h" +#include "pub_core_xarray.h" +#include "pub_core_wordfm.h" +#include "priv_misc.h" /* dinfo_zalloc/free */ +#include "priv_tytypes.h" +#include "priv_d3basics.h" +#include "priv_storage.h" +#include "priv_readdwarf3.h" /* self */ + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Basic machinery for parsing DIEs. ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +#define TRACE_D3(format, args...) \ + if (td3) { VG_(printf)(format, ## args); } + +#define D3_INVALID_CUOFF ((UWord)(-1UL)) +#define D3_FAKEVOID_CUOFF ((UWord)(-2UL)) + +typedef + struct { + UChar* region_start_img; + UWord region_szB; + UWord region_next; + void (*barf)( HChar* ) __attribute__((noreturn)); + HChar* barfstr; + } + Cursor; + +static inline Bool is_sane_Cursor ( Cursor* c ) { + if (!c) return False; + if (!c->barf) return False; + if (!c->barfstr) return False; + return True; +} + +static void init_Cursor ( Cursor* c, + UChar* region_start_img, + UWord region_szB, + UWord region_next, + __attribute__((noreturn)) void (*barf)( HChar* ), + HChar* barfstr ) +{ + vg_assert(c); + VG_(memset)(c, 0, sizeof(*c)); + c->region_start_img = region_start_img; + c->region_szB = region_szB; + c->region_next = region_next; + c->barf = barf; + c->barfstr = barfstr; + vg_assert(is_sane_Cursor(c)); +} + +static Bool is_at_end_Cursor ( Cursor* c ) { + vg_assert(is_sane_Cursor(c)); + return c->region_next >= c->region_szB; +} + +static inline UWord get_position_of_Cursor ( Cursor* c ) { + vg_assert(is_sane_Cursor(c)); + return c->region_next; +} +static inline void set_position_of_Cursor ( Cursor* c, UWord pos ) { + c->region_next = pos; + vg_assert(is_sane_Cursor(c)); +} + +static /*signed*/Word get_remaining_length_Cursor ( Cursor* c ) { + vg_assert(is_sane_Cursor(c)); + return c->region_szB - c->region_next; +} + +static UChar* get_address_of_Cursor ( Cursor* c ) { + vg_assert(is_sane_Cursor(c)); + return &c->region_start_img[ c->region_next ]; +} + +__attribute__((noreturn)) +static void failWith ( Cursor* c, HChar* str ) { + vg_assert(c); + vg_assert(c->barf); + c->barf(str); + /*NOTREACHED*/ + vg_assert(0); +} + +/* FIXME: document assumptions on endianness for + get_UShort/UInt/ULong. */ +static inline UChar get_UChar ( Cursor* c ) { + UChar r; + /* vg_assert(is_sane_Cursor(c)); */ + if (c->region_next + sizeof(UChar) > c->region_szB) { + c->barf(c->barfstr); + /*NOTREACHED*/ + vg_assert(0); + } + r = * (UChar*) &c->region_start_img[ c->region_next ]; + c->region_next += sizeof(UChar); + return r; +} +static UShort get_UShort ( Cursor* c ) { + UShort r; + vg_assert(is_sane_Cursor(c)); + if (c->region_next + sizeof(UShort) > c->region_szB) { + c->barf(c->barfstr); + /*NOTREACHED*/ + vg_assert(0); + } + r = * (UShort*) &c->region_start_img[ c->region_next ]; + c->region_next += sizeof(UShort); + return r; +} +static UInt get_UInt ( Cursor* c ) { + UInt r; + vg_assert(is_sane_Cursor(c)); + if (c->region_next + sizeof(UInt) > c->region_szB) { + c->barf(c->barfstr); + /*NOTREACHED*/ + vg_assert(0); + } + r = * (UInt*) &c->region_start_img[ c->region_next ]; + c->region_next += sizeof(UInt); + return r; +} +static ULong get_ULong ( Cursor* c ) { + ULong r; + vg_assert(is_sane_Cursor(c)); + if (c->region_next + sizeof(ULong) > c->region_szB) { + c->barf(c->barfstr); + /*NOTREACHED*/ + vg_assert(0); + } + r = * (ULong*) &c->region_start_img[ c->region_next ]; + c->region_next += sizeof(ULong); + return r; +} +static inline ULong get_ULEB128 ( Cursor* c ) { + ULong result; + Int shift; + UChar byte; + /* unroll first iteration */ + byte = get_UChar( c ); + result = (ULong)(byte & 0x7f); + if (LIKELY(!(byte & 0x80))) return result; + shift = 7; + /* end unroll first iteration */ + do { + byte = get_UChar( c ); + result |= ((ULong)(byte & 0x7f)) << shift; + shift += 7; + } while (byte & 0x80); + return result; +} +static Long get_SLEB128 ( Cursor* c ) { + ULong result = 0; + Int shift = 0; + UChar byte; + do { + byte = get_UChar(c); + result |= ((ULong)(byte & 0x7f)) << shift; + shift += 7; + } while (byte & 0x80); + if (shift < 64 && (byte & 0x40)) + result |= -(1ULL << shift); + return result; +} + +/* Assume 'c' points to the start of a string. Return the absolute + address of whatever it points at, and advance it past the + terminating zero. This makes it safe for the caller to then copy + the string with ML_(addStr), since (w.r.t. image overruns) the + process of advancing past the terminating zero will already have + "vetted" the string. */ +static UChar* get_AsciiZ ( Cursor* c ) { + UChar uc; + UChar* res = get_address_of_Cursor(c); + do { uc = get_UChar(c); } while (uc != 0); + return res; +} + +static ULong peek_ULEB128 ( Cursor* c ) { + Word here = c->region_next; + ULong r = get_ULEB128( c ); + c->region_next = here; + return r; +} +static UChar peek_UChar ( Cursor* c ) { + Word here = c->region_next; + UChar r = get_UChar( c ); + c->region_next = here; + return r; +} + +static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) { + return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c); +} + +static UWord get_UWord ( Cursor* c ) { + vg_assert(sizeof(UWord) == sizeof(void*)); + if (sizeof(UWord) == 4) return get_UInt(c); + if (sizeof(UWord) == 8) return get_ULong(c); + vg_assert(0); +} + +/* Read a DWARF3 'Initial Length' field */ +static ULong get_Initial_Length ( /*OUT*/Bool* is64, + Cursor* c, + HChar* barfMsg ) +{ + ULong w64; + UInt w32; + *is64 = False; + w32 = get_UInt( c ); + if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) { + c->barf( barfMsg ); + } + else if (w32 == 0xFFFFFFFF) { + *is64 = True; + w64 = get_ULong( c ); + } else { + *is64 = False; + w64 = (ULong)w32; + } + return w64; +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- "CUConst" structure ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +#define N_ABBV_CACHE 32 + +/* Holds information that is constant through the parsing of a + Compilation Unit. This is basically plumbed through to + everywhere. */ +typedef + struct { + /* Call here if anything goes wrong */ + void (*barf)( HChar* ) __attribute__((noreturn)); + /* Is this 64-bit DWARF ? */ + Bool is_dw64; + /* Which DWARF version ? (2 or 3) */ + UShort version; + /* Length of this Compilation Unit, as stated in the + .unit_length :: InitialLength field of the CU Header. + However, this size (as specified by the D3 spec) does not + include the size of the .unit_length field itself, which is + either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value + can be obtained through the expression ".is_dw64 ? 12 : 4". */ + ULong unit_length; + /* Offset of start of this unit in .debug_info */ + UWord cu_start_offset; + /* SVMA for this CU. In the D3 spec, is known as the "base + address of the compilation unit (last para sec 3.1.1). + Needed for (amongst things) interpretation of location-list + values. */ + Addr cu_svma; + Bool cu_svma_known; + /* The debug_abbreviations table to be used for this Unit */ + UChar* debug_abbv; + /* Upper bound on size thereof (an overestimate, in general) */ + UWord debug_abbv_maxszB; + /* Where is .debug_str ? */ + UChar* debug_str_img; + UWord debug_str_sz; + /* Where is .debug_ranges ? */ + UChar* debug_ranges_img; + UWord debug_ranges_sz; + /* Where is .debug_loc ? */ + UChar* debug_loc_img; + UWord debug_loc_sz; + /* Where is .debug_line? */ + UChar* debug_line_img; + UWord debug_line_sz; + /* Where is .debug_info? */ + UChar* debug_info_img; + UWord debug_info_sz; + /* --- Needed so we can add stuff to the string table. --- */ + struct _DebugInfo* di; + /* --- a cache for set_abbv_Cursor --- */ + /* abbv_code == (ULong)-1 for an unused entry. */ + struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE]; + UWord saC_cache_queries; + UWord saC_cache_misses; + } + CUConst; + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Helper functions for Guarded Expressions ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/* Parse the location list starting at img-offset 'debug_loc_offset' + in .debug_loc. Results are biased with 'svma_of_referencing_CU' + and so I believe are correct SVMAs for the object as a whole. This + function allocates the UChar*, and the caller must deallocate it. + The resulting block is in so-called Guarded-Expression format. + + Guarded-Expression format is similar but not identical to the DWARF3 + location-list format. The format of each returned block is: + + UChar biasMe; + UChar isEnd; + followed by zero or more of + + (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd) + + '..bytes..' is an standard DWARF3 location expression which is + valid when aMin <= pc <= aMax (possibly after suitable biasing). + + The number of bytes in '..bytes..' is nbytes. + + The end of the sequence is marked by an isEnd == 1 value. All + previous isEnd values must be zero. + + biasMe is 1 if the aMin/aMax fields need this DebugInfo's + text_bias added before use, and 0 if the GX is this is not + necessary (is ready to go). + + Hence the block can be quickly parsed and is self-describing. Note + that aMax is 1 less than the corresponding value in a DWARF3 + location list. Zero length ranges, with aMax == aMin-1, are not + allowed. +*/ +/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where + it more logically belongs. */ + + +/* Apply a text bias to a GX. */ +static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di ) +{ + UShort nbytes; + Addr* pA; + UChar* p = &gx->payload[0]; + UChar uc; + uc = *p++; /*biasMe*/ + if (uc == 0) + return; + vg_assert(uc == 1); + p[-1] = 0; /* mark it as done */ + while (True) { + uc = *p++; + if (uc == 1) + break; /*isEnd*/ + vg_assert(uc == 0); + /* t-bias aMin */ + pA = (Addr*)p; + *pA += di->text_debug_bias; + p += sizeof(Addr); + /* t-bias aMax */ + pA = (Addr*)p; + *pA += di->text_debug_bias; + p += sizeof(Addr); + /* nbytes, and actual expression */ + nbytes = * (UShort*)p; p += sizeof(UShort); + p += nbytes; + } +} + +__attribute__((noinline)) +static GExpr* make_singleton_GX ( UChar* block, UWord nbytes ) +{ + SizeT bytesReqd; + GExpr* gx; + UChar *p, *pstart; + + vg_assert(sizeof(UWord) == sizeof(Addr)); + vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */ + bytesReqd + = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/ + + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/ + + sizeof(UShort) /*nbytes*/ + nbytes + + sizeof(UChar); /*isEnd*/ + + gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1", + sizeof(GExpr) + bytesReqd ); + vg_assert(gx); + + p = pstart = &gx->payload[0]; + + * ((UChar*)p) = 0; /*biasMe*/ p += sizeof(UChar); + * ((UChar*)p) = 0; /*!isEnd*/ p += sizeof(UChar); + * ((Addr*)p) = 0; /*aMin*/ p += sizeof(Addr); + * ((Addr*)p) = ~((Addr)0); /*aMax */ p += sizeof(Addr); + * ((UShort*)p) = (UShort)nbytes; /*nbytes*/ p += sizeof(UShort); + VG_(memcpy)(p, block, nbytes); p += nbytes; + * ((UChar*)p) = 1; /*isEnd*/ p += sizeof(UChar); + + vg_assert( (SizeT)(p - pstart) == bytesReqd); + vg_assert( &gx->payload[bytesReqd] + == ((UChar*)gx) + sizeof(GExpr) + bytesReqd ); + + return gx; +} + +__attribute__((noinline)) +static GExpr* make_general_GX ( CUConst* cc, + Bool td3, + UWord debug_loc_offset, + Addr svma_of_referencing_CU ) +{ + Addr base; + Cursor loc; + XArray* xa; /* XArray of UChar */ + GExpr* gx; + Word nbytes; + + vg_assert(sizeof(UWord) == sizeof(Addr)); + if (cc->debug_loc_sz == 0) + cc->barf("make_general_GX: .debug_loc is empty/missing"); + + init_Cursor( &loc, cc->debug_loc_img, + cc->debug_loc_sz, 0, cc->barf, + "Overrun whilst reading .debug_loc section(2)" ); + set_position_of_Cursor( &loc, debug_loc_offset ); + + TRACE_D3("make_general_GX (.debug_loc_offset = %lu, img = %p) {\n", + debug_loc_offset, get_address_of_Cursor( &loc ) ); + + /* Who frees this xa? It is freed before this fn exits. */ + xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1", + ML_(dinfo_free), + sizeof(UChar) ); + + { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } + + base = 0; + while (True) { + Bool acquire; + UWord len; + /* Read a (host-)word pair. This is something of a hack since + the word size to read is really dictated by the ELF file; + however, we assume we're reading a file with the same + word-sizeness as the host. Reasonably enough. */ + UWord w1 = get_UWord( &loc ); + UWord w2 = get_UWord( &loc ); + + TRACE_D3(" %08lx %08lx\n", w1, w2); + if (w1 == 0 && w2 == 0) + break; /* end of list */ + + if (w1 == -1UL) { + /* new value for 'base' */ + base = w2; + continue; + } + + /* else a location expression follows */ + /* else enumerate [w1+base, w2+base) */ + /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" + (sec 2.17.2) */ + if (w1 > w2) { + TRACE_D3("negative range is for .debug_loc expr at " + "file offset %lu\n", + debug_loc_offset); + cc->barf( "negative range in .debug_loc section" ); + } + + /* ignore zero length ranges */ + acquire = w1 < w2; + len = (UWord)get_UShort( &loc ); + + if (acquire) { + UWord w; + UShort s; + UChar c; + c = 0; /* !isEnd*/ + VG_(addBytesToXA)( xa, &c, sizeof(c) ); + w = w1 + base + svma_of_referencing_CU; + VG_(addBytesToXA)( xa, &w, sizeof(w) ); + w = w2 -1 + base + svma_of_referencing_CU; + VG_(addBytesToXA)( xa, &w, sizeof(w) ); + s = (UShort)len; + VG_(addBytesToXA)( xa, &s, sizeof(s) ); + } + + while (len > 0) { + UChar byte = get_UChar( &loc ); + TRACE_D3("%02x", (UInt)byte); + if (acquire) + VG_(addBytesToXA)( xa, &byte, 1 ); + len--; + } + TRACE_D3("\n"); + } + + { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); } + + nbytes = VG_(sizeXA)( xa ); + vg_assert(nbytes >= 1); + + gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes ); + vg_assert(gx); + VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes ); + vg_assert( &gx->payload[nbytes] + == ((UChar*)gx) + sizeof(GExpr) + nbytes ); + + VG_(deleteXA)( xa ); + + TRACE_D3("}\n"); + + return gx; +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Helper functions for range lists and CU headers ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/* Denotes an address range. Both aMin and aMax are included in the + range; hence a complete range is (0, ~0) and an empty range is any + (X, X-1) for X > 0.*/ +typedef + struct { Addr aMin; Addr aMax; } + AddrRange; + + +/* Generate an arbitrary structural total ordering on + XArray* of AddrRange. */ +static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 ) +{ + Word n1, n2, i; + tl_assert(rngs1 && rngs2); + n1 = VG_(sizeXA)( rngs1 ); + n2 = VG_(sizeXA)( rngs2 ); + if (n1 < n2) return -1; + if (n1 > n2) return 1; + for (i = 0; i < n1; i++) { + AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i ); + AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i ); + if (rng1->aMin < rng2->aMin) return -1; + if (rng1->aMin > rng2->aMin) return 1; + if (rng1->aMax < rng2->aMax) return -1; + if (rng1->aMax > rng2->aMax) return 1; + } + return 0; +} + + +__attribute__((noinline)) +static XArray* /* of AddrRange */ empty_range_list ( void ) +{ + XArray* xa; /* XArray of AddrRange */ + /* Who frees this xa? varstack_preen() does. */ + xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1", + ML_(dinfo_free), + sizeof(AddrRange) ); + return xa; +} + + +__attribute__((noinline)) +static XArray* unitary_range_list ( Addr aMin, Addr aMax ) +{ + XArray* xa; + AddrRange pair; + vg_assert(aMin <= aMax); + /* Who frees this xa? varstack_preen() does. */ + xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1", + ML_(dinfo_free), + sizeof(AddrRange) ); + pair.aMin = aMin; + pair.aMax = aMax; + VG_(addToXA)( xa, &pair ); + return xa; +} + + +/* Enumerate the address ranges starting at img-offset + 'debug_ranges_offset' in .debug_ranges. Results are biased with + 'svma_of_referencing_CU' and so I believe are correct SVMAs for the + object as a whole. This function allocates the XArray, and the + caller must deallocate it. */ +__attribute__((noinline)) +static XArray* /* of AddrRange */ + get_range_list ( CUConst* cc, + Bool td3, + UWord debug_ranges_offset, + Addr svma_of_referencing_CU ) +{ + Addr base; + Cursor ranges; + XArray* xa; /* XArray of AddrRange */ + AddrRange pair; + + if (cc->debug_ranges_sz == 0) + cc->barf("get_range_list: .debug_ranges is empty/missing"); + + init_Cursor( &ranges, cc->debug_ranges_img, + cc->debug_ranges_sz, 0, cc->barf, + "Overrun whilst reading .debug_ranges section(2)" ); + set_position_of_Cursor( &ranges, debug_ranges_offset ); + + /* Who frees this xa? varstack_preen() does. */ + xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free), + sizeof(AddrRange) ); + base = 0; + while (True) { + /* Read a (host-)word pair. This is something of a hack since + the word size to read is really dictated by the ELF file; + however, we assume we're reading a file with the same + word-sizeness as the host. Reasonably enough. */ + UWord w1 = get_UWord( &ranges ); + UWord w2 = get_UWord( &ranges ); + + if (w1 == 0 && w2 == 0) + break; /* end of list. */ + + if (w1 == -1UL) { + /* new value for 'base' */ + base = w2; + continue; + } + + /* else enumerate [w1+base, w2+base) */ + /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc" + (sec 2.17.2) */ + if (w1 > w2) + cc->barf( "negative range in .debug_ranges section" ); + if (w1 < w2) { + pair.aMin = w1 + base + svma_of_referencing_CU; + pair.aMax = w2 - 1 + base + svma_of_referencing_CU; + vg_assert(pair.aMin <= pair.aMax); + VG_(addToXA)( xa, &pair ); + } + } + return xa; +} + + +/* Parse the Compilation Unit header indicated at 'c' and + initialise 'cc' accordingly. */ +static __attribute__((noinline)) +void parse_CU_Header ( /*OUT*/CUConst* cc, + Bool td3, + Cursor* c, + UChar* debug_abbv_img, UWord debug_abbv_sz ) +{ + UChar address_size; + UWord debug_abbrev_offset; + Int i; + + VG_(memset)(cc, 0, sizeof(*cc)); + vg_assert(c && c->barf); + cc->barf = c->barf; + + /* initial_length field */ + cc->unit_length + = get_Initial_Length( &cc->is_dw64, c, + "parse_CU_Header: invalid initial-length field" ); + + TRACE_D3(" Length: %lld\n", cc->unit_length ); + + /* version */ + cc->version = get_UShort( c ); + if (cc->version != 2 && cc->version != 3) + cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3" ); + TRACE_D3(" Version: %d\n", (Int)cc->version ); + + /* debug_abbrev_offset */ + debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 ); + if (debug_abbrev_offset >= debug_abbv_sz) + cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" ); + TRACE_D3(" Abbrev Offset: %ld\n", debug_abbrev_offset ); + + /* address size. If this isn't equal to the host word size, just + give up. This makes it safe to assume elsewhere that + DW_FORM_addr and DW_FORM_ref_addr can be treated as a host + word. */ + address_size = get_UChar( c ); + if (address_size != sizeof(void*)) + cc->barf( "parse_CU_Header: invalid address_size" ); + TRACE_D3(" Pointer Size: %d\n", (Int)address_size ); + + /* Set up so that cc->debug_abbv points to the relevant table for + this CU. Set the szB so that at least we can't read off the end + of the debug_abbrev section -- potentially (and quite likely) + too big, if this isn't the last table in the section, but at + least it's safe. */ + cc->debug_abbv = debug_abbv_img + debug_abbrev_offset; + cc->debug_abbv_maxszB = debug_abbv_sz - debug_abbrev_offset; + /* and empty out the set_abbv_Cursor cache */ + if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n"); + for (i = 0; i < N_ABBV_CACHE; i++) { + cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */ + cc->saC_cache[i].posn = 0; + } + cc->saC_cache_queries = 0; + cc->saC_cache_misses = 0; +} + + +/* Set up 'c' so it is ready to parse the abbv table entry code + 'abbv_code' for this compilation unit. */ +static __attribute__((noinline)) +void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3, + CUConst* cc, ULong abbv_code ) +{ + Int i; + ULong acode; + + if (abbv_code == 0) + cc->barf("set_abbv_Cursor: abbv_code == 0" ); + + /* (ULong)-1 is used to represent an empty cache slot. So we can't + allow it. In any case no valid DWARF3 should make a reference + to a negative abbreviation code. [at least, they always seem to + be numbered upwards from zero as far as I have seen] */ + vg_assert(abbv_code != (ULong)-1); + + /* First search the cache. */ + if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n"); + cc->saC_cache_queries++; + for (i = 0; i < N_ABBV_CACHE; i++) { + /* No need to test the cached abbv_codes for -1 (empty), since + we just asserted that abbv_code is not -1. */ + if (cc->saC_cache[i].abbv_code == abbv_code) { + /* Found it. Cool. Set up the parser using the cached + position, and move this cache entry 1 step closer to the + front. */ + if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n"); + init_Cursor( c, cc->debug_abbv, + cc->debug_abbv_maxszB, cc->saC_cache[i].posn, + cc->barf, + "Overrun whilst parsing .debug_abbrev section(1)" ); + if (i > 0) { + ULong t_abbv_code = cc->saC_cache[i].abbv_code; + UWord t_posn = cc->saC_cache[i].posn; + while (i > 0) { + cc->saC_cache[i] = cc->saC_cache[i-1]; + cc->saC_cache[0].abbv_code = t_abbv_code; + cc->saC_cache[0].posn = t_posn; + i--; + } + } + return; + } + } + + /* No. It's not in the cache. We have to search through + .debug_abbrev, of course taking care to update the cache + when done. */ + + cc->saC_cache_misses++; + init_Cursor( c, cc->debug_abbv, cc->debug_abbv_maxszB, 0, cc->barf, + "Overrun whilst parsing .debug_abbrev section(2)" ); + + /* Now iterate though the table until we find the requested + entry. */ + while (True) { + //ULong atag; + //UInt has_children; + acode = get_ULEB128( c ); + if (acode == 0) break; /* end of the table */ + if (acode == abbv_code) break; /* found it */ + /*atag = */ get_ULEB128( c ); + /*has_children = */ get_UChar( c ); + //TRACE_D3(" %llu %s [%s]\n", + // acode, pp_DW_TAG(atag), pp_DW_children(has_children)); + while (True) { + ULong at_name = get_ULEB128( c ); + ULong at_form = get_ULEB128( c ); + if (at_name == 0 && at_form == 0) break; + //TRACE_D3(" %18s %s\n", + // pp_DW_AT(at_name), pp_DW_FORM(at_form)); + } + } + + if (acode == 0) { + /* Not found. This is fatal. */ + cc->barf("set_abbv_Cursor: abbv_code not found"); + } + + /* Otherwise, 'c' is now set correctly to parse the relevant entry, + starting from the abbreviation entry's tag. So just cache + the result, and return. */ + for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) { + cc->saC_cache[i] = cc->saC_cache[i-1]; + } + if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n"); + cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code; + cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c); +} + + +/* From 'c', get the Form data into the lowest 1/2/4/8 bytes of *cts. + + If *cts itself contains the entire result, then *ctsSzB is set to + 1,2,4 or 8 accordingly and *ctsMemSzB is set to zero. + + Alternatively, the result can be a block of data (in the + transiently mapped-in object, so-called "image" space). If so then + the lowest sizeof(void*)/8 bytes of *cts hold a pointer to said + image, *ctsSzB is zero, and *ctsMemSzB is the size of the block. + + Unfortunately this means it is impossible to represent a zero-size + image block since that would have *ctsSzB == 0 and *ctsMemSzB == 0 + and so is ambiguous (which case it is?) + + Invariant on successful return: + (*ctsSzB > 0 && *ctsMemSzB == 0) + || (*ctsSzB == 0 && *ctsMemSzB > 0) +*/ +static +void get_Form_contents ( /*OUT*/ULong* cts, + /*OUT*/Int* ctsSzB, + /*OUT*/UWord* ctsMemSzB, + CUConst* cc, Cursor* c, + Bool td3, DW_FORM form ) +{ + *cts = 0; + *ctsSzB = 0; + *ctsMemSzB = 0; + switch (form) { + case DW_FORM_data1: + *cts = (ULong)(UChar)get_UChar(c); + *ctsSzB = 1; + TRACE_D3("%u", (UInt)*cts); + break; + case DW_FORM_data2: + *cts = (ULong)(UShort)get_UShort(c); + *ctsSzB = 2; + TRACE_D3("%u", (UInt)*cts); + break; + case DW_FORM_data4: + *cts = (ULong)(UInt)get_UInt(c); + *ctsSzB = 4; + TRACE_D3("%u", (UInt)*cts); + break; + case DW_FORM_data8: + *cts = get_ULong(c); + *ctsSzB = 8; + TRACE_D3("%llu", *cts); + break; + case DW_FORM_sdata: + *cts = (ULong)(Long)get_SLEB128(c); + *ctsSzB = 8; + TRACE_D3("%lld", (Long)*cts); + break; + case DW_FORM_addr: + /* note, this is a hack. DW_FORM_addr is defined as getting + a word the size of the target machine as defined by the + address_size field in the CU Header. However, + parse_CU_Header() rejects all inputs except those for + which address_size == sizeof(Word), hence we can just + treat it as a (host) Word. */ + *cts = (ULong)(UWord)get_UWord(c); + *ctsSzB = sizeof(UWord); + TRACE_D3("0x%lx", (UWord)*cts); + break; + + case DW_FORM_ref_addr: + /* We make the same word-size assumption as DW_FORM_addr. */ + /* What does this really mean? From D3 Sec 7.5.4, + description of "reference", it would appear to reference + some other DIE, by specifying the offset from the + beginning of a .debug_info section. The D3 spec mentions + that this might be in some other shared object and + executable. But I don't see how the name of the other + object/exe is specified. + + At least for the DW_FORM_ref_addrs created by icc11, the + references seem to be within the same object/executable. + So for the moment we merely range-check, to see that they + actually do specify a plausible offset within this + object's .debug_info, and return the value unchanged. + */ + *cts = (ULong)(UWord)get_UWord(c); + *ctsSzB = sizeof(UWord); + TRACE_D3("0x%lx", (UWord)*cts); + if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)*cts); + if (/* the following 2 are surely impossible, but ... */ + cc->debug_info_img == NULL || cc->debug_info_sz == 0 + || *cts >= (ULong)cc->debug_info_sz) { + /* Hmm. Offset is nonsensical for this object's .debug_info + section. Be safe and reject it. */ + cc->barf("get_Form_contents: DW_FORM_ref_addr points " + "outside .debug_info"); + } + break; + + case DW_FORM_strp: { + /* this is an offset into .debug_str */ + UChar* str; + UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 ); + if (cc->debug_str_img == NULL || uw >= cc->debug_str_sz) + cc->barf("get_Form_contents: DW_FORM_strp " + "points outside .debug_str"); + /* FIXME: check the entire string lies inside debug_str, + not just the first byte of it. */ + str = (UChar*)cc->debug_str_img + uw; + TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, str); + *cts = (ULong)(UWord)str; + *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); + break; + } + case DW_FORM_string: { + UChar* str = get_AsciiZ(c); + TRACE_D3("%s", str); + *cts = (ULong)(UWord)str; + /* strlen is safe because get_AsciiZ already 'vetted' the + entire string */ + *ctsMemSzB = 1 + (ULong)VG_(strlen)(str); + break; + } + case DW_FORM_ref4: { + UInt u32 = get_UInt(c); + UWord res = cc->cu_start_offset + (UWord)u32; + *cts = (ULong)res; + *ctsSzB = sizeof(UWord); + TRACE_D3("<%lx>", res); + break; + } + case DW_FORM_flag: { + UChar u8 = get_UChar(c); + TRACE_D3("%u", (UInt)u8); + *cts = (ULong)u8; + *ctsSzB = 1; + break; + } + case DW_FORM_block1: { + ULong u64b; + ULong u64 = (ULong)get_UChar(c); + UChar* block = get_address_of_Cursor(c); + TRACE_D3("%llu byte block: ", u64); + for (u64b = u64; u64b > 0; u64b--) { + UChar u8 = get_UChar(c); + TRACE_D3("%x ", (UInt)u8); + } + *cts = (ULong)(UWord)block; + *ctsMemSzB = (UWord)u64; + break; + } + case DW_FORM_block2: { + ULong u64b; + ULong u64 = (ULong)get_UShort(c); + UChar* block = get_address_of_Cursor(c); + TRACE_D3("%llu byte block: ", u64); + for (u64b = u64; u64b > 0; u64b--) { + UChar u8 = get_UChar(c); + TRACE_D3("%x ", (UInt)u8); + } + *cts = (ULong)(UWord)block; + *ctsMemSzB = (UWord)u64; + break; + } + default: + VG_(printf)( + "get_Form_contents: unhandled %d (%s) at <%lx>\n", + form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c)); + c->barf("get_Form_contents: unhandled DW_FORM"); + } +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Parsing of variable-related DIEs ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +typedef + struct _TempVar { + UChar* name; /* in DebugInfo's .strchunks */ + /* Represent ranges economically. nRanges is the number of + ranges. Cases: + 0: .rngOneMin .rngOneMax .manyRanges are all zero + 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL + 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges. + This is merely an optimisation to avoid having to allocate + and free the XArray in the common (98%) of cases where there + is zero or one address ranges. */ + UWord nRanges; + Addr rngOneMin; + Addr rngOneMax; + XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */ + /* Do not free .rngMany, since many TempVars will have the same + value. Instead the associated storage is to be freed by + deleting 'rangetree', which stores a single copy of each + range. */ + /* --- */ + Int level; + UWord typeR; /* a cuOff */ + GExpr* gexpr; /* for this variable */ + GExpr* fbGX; /* to find the frame base of the enclosing fn, if + any */ + UChar* fName; /* declaring file name, or NULL */ + Int fLine; /* declaring file line number, or zero */ + /* offset in .debug_info, so that abstract instances can be + found to satisfy references from concrete instances. */ + UWord dioff; + UWord absOri; /* so the absOri fields refer to dioff fields + in some other, related TempVar. */ + } + TempVar; + +#define N_D3_VAR_STACK 48 + +typedef + struct { + /* Contains the range stack: a stack of address ranges, one + stack entry for each nested scope. + + Some scope entries are created by function definitions + (DW_AT_subprogram), and for those, we also note the GExpr + derived from its DW_AT_frame_base attribute, if any. + Consequently it should be possible to find, for any + variable's DIE, the GExpr for the the containing function's + DW_AT_frame_base by scanning back through the stack to find + the nearest entry associated with a function. This somewhat + elaborate scheme is provided so as to make it possible to + obtain the correct DW_AT_frame_base expression even in the + presence of nested functions (or to be more precise, in the + presence of nested DW_AT_subprogram DIEs). + */ + Int sp; /* [sp] is innermost active entry; sp==-1 for empty + stack */ + XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */ + Int level[N_D3_VAR_STACK]; /* D3 DIE levels */ + Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */ + GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB + expr, else NULL */ + /* The file name table. Is a mapping from integer index to the + (permanent) copy of the string, iow a non-img area. */ + XArray* /* of UChar* */ filenameTable; + } + D3VarParser; + +static void varstack_show ( D3VarParser* parser, HChar* str ) { + Word i, j; + VG_(printf)(" varstack (%s) {\n", str); + for (i = 0; i <= parser->sp; i++) { + XArray* xa = parser->ranges[i]; + vg_assert(xa); + VG_(printf)(" [%ld] (level %d)", i, parser->level[i]); + if (parser->isFunc[i]) { + VG_(printf)(" (fbGX=%p)", parser->fbGX[i]); + } else { + vg_assert(parser->fbGX[i] == NULL); + } + VG_(printf)(": "); + if (VG_(sizeXA)( xa ) == 0) { + VG_(printf)("** empty PC range array **"); + } else { + for (j = 0; j < VG_(sizeXA)( xa ); j++) { + AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j ); + vg_assert(range); + VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax); + } + } + VG_(printf)("\n"); + } + VG_(printf)(" }\n"); +} + +/* Remove from the stack, all entries with .level > 'level' */ +static +void varstack_preen ( D3VarParser* parser, Bool td3, Int level ) +{ + Bool changed = False; + vg_assert(parser->sp < N_D3_VAR_STACK); + while (True) { + vg_assert(parser->sp >= -1); + if (parser->sp == -1) break; + if (parser->level[parser->sp] <= level) break; + if (0) + TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1); + vg_assert(parser->ranges[parser->sp]); + /* Who allocated this xa? get_range_list() or + unitary_range_list(). */ + VG_(deleteXA)( parser->ranges[parser->sp] ); + parser->ranges[parser->sp] = NULL; + parser->level[parser->sp] = 0; + parser->isFunc[parser->sp] = False; + parser->fbGX[parser->sp] = NULL; + parser->sp--; + changed = True; + } + if (changed && td3) + varstack_show( parser, "after preen" ); +} + +static void varstack_push ( CUConst* cc, + D3VarParser* parser, + Bool td3, + XArray* ranges, Int level, + Bool isFunc, GExpr* fbGX ) { + if (0) + TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n", + parser->sp+1, level, ranges); + + /* First we need to zap everything >= 'level', as we are about to + replace any previous entry at 'level', so .. */ + varstack_preen(parser, /*td3*/False, level-1); + + vg_assert(parser->sp >= -1); + vg_assert(parser->sp < N_D3_VAR_STACK); + if (parser->sp == N_D3_VAR_STACK-1) + cc->barf("varstack_push: N_D3_VAR_STACK is too low; " + "increase and recompile"); + if (parser->sp >= 0) + vg_assert(parser->level[parser->sp] < level); + parser->sp++; + vg_assert(parser->ranges[parser->sp] == NULL); + vg_assert(parser->level[parser->sp] == 0); + vg_assert(parser->isFunc[parser->sp] == False); + vg_assert(parser->fbGX[parser->sp] == NULL); + vg_assert(ranges != NULL); + if (!isFunc) vg_assert(fbGX == NULL); + parser->ranges[parser->sp] = ranges; + parser->level[parser->sp] = level; + parser->isFunc[parser->sp] = isFunc; + parser->fbGX[parser->sp] = fbGX; + if (td3) + varstack_show( parser, "after push" ); +} + + +/* cts, ctsSzB, ctsMemSzB are derived from a DW_AT_location and so + refer either to a location expression or to a location list. + Figure out which, and in both cases bundle the expression or + location list into a so-called GExpr (guarded expression). */ +__attribute__((noinline)) +static GExpr* get_GX ( CUConst* cc, Bool td3, + ULong cts, Int ctsSzB, UWord ctsMemSzB ) +{ + GExpr* gexpr = NULL; + if (ctsMemSzB > 0 && ctsSzB == 0) { + /* represents an in-line location expression, and cts points + right at it */ + gexpr = make_singleton_GX( (UChar*)(UWord)cts, ctsMemSzB ); + } + else + if (ctsMemSzB == 0 && ctsSzB > 0) { + /* represents location list. cts is the offset of it in + .debug_loc. */ + if (!cc->cu_svma_known) + cc->barf("get_GX: location list, but CU svma is unknown"); + gexpr = make_general_GX( cc, td3, (UWord)cts, cc->cu_svma ); + } + else { + vg_assert(0); /* else caller is bogus */ + } + return gexpr; +} + + +static +void read_filename_table( /*MOD*/D3VarParser* parser, + CUConst* cc, UWord debug_line_offset, + Bool td3 ) +{ + Bool is_dw64; + Cursor c; + Word i; + UShort version; + UChar opcode_base; + UChar* str; + + vg_assert(parser && cc && cc->barf); + if ((!cc->debug_line_img) + || cc->debug_line_sz <= debug_line_offset) + cc->barf("read_filename_table: .debug_line is missing?"); + + init_Cursor( &c, cc->debug_line_img, + cc->debug_line_sz, debug_line_offset, cc->barf, + "Overrun whilst reading .debug_line section(1)" ); + + /* unit_length = */ + get_Initial_Length( &is_dw64, &c, + "read_filename_table: invalid initial-length field" ); + version = get_UShort( &c ); + if (version != 2) + cc->barf("read_filename_table: Only DWARF version 2 line info " + "is currently supported."); + /*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 ); + /*minimum_instruction_length = */ get_UChar( &c ); + /*default_is_stmt = */ get_UChar( &c ); + /*line_base = (Char)*/ get_UChar( &c ); + /*line_range = */ get_UChar( &c ); + opcode_base = get_UChar( &c ); + /* skip over "standard_opcode_lengths" */ + for (i = 1; i < (Word)opcode_base; i++) + (void)get_UChar( &c ); + + /* skip over the directory names table */ + while (peek_UChar(&c) != 0) { + (void)get_AsciiZ(&c); + } + (void)get_UChar(&c); /* skip terminating zero */ + + /* Read and record the file names table */ + vg_assert(parser->filenameTable); + vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 ); + /* Add a dummy index-zero entry. DWARF3 numbers its files + from 1, for some reason. */ + str = ML_(addStr)( cc->di, "<unknown_file>", -1 ); + VG_(addToXA)( parser->filenameTable, &str ); + while (peek_UChar(&c) != 0) { + str = get_AsciiZ(&c); + TRACE_D3(" read_filename_table: %ld %s\n", + VG_(sizeXA)(parser->filenameTable), str); + str = ML_(addStr)( cc->di, str, -1 ); + VG_(addToXA)( parser->filenameTable, &str ); + (void)get_ULEB128( &c ); /* skip directory index # */ + (void)get_ULEB128( &c ); /* skip last mod time */ + (void)get_ULEB128( &c ); /* file size */ + } + /* We're done! The rest of it is not interesting. */ +} + + +__attribute__((noinline)) +static void parse_var_DIE ( + /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, + /*MOD*/XArray* /* of TempVar* */ tempvars, + /*MOD*/XArray* /* of GExpr* */ gexprs, + /*MOD*/D3VarParser* parser, + DW_TAG dtag, + UWord posn, + Int level, + Cursor* c_die, + Cursor* c_abbv, + CUConst* cc, + Bool td3 +) +{ + ULong cts; + Int ctsSzB; + UWord ctsMemSzB; + + UWord saved_die_c_offset = get_position_of_Cursor( c_die ); + UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); + + varstack_preen( parser, td3, level-1 ); + + if (dtag == DW_TAG_compile_unit) { + Bool have_lo = False; + Bool have_hi1 = False; + Bool have_range = False; + Addr ip_lo = 0; + Addr ip_hi1 = 0; + Addr rangeoff = 0; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_low_pc && ctsSzB > 0) { + ip_lo = cts; + have_lo = True; + } + if (attr == DW_AT_high_pc && ctsSzB > 0) { + ip_hi1 = cts; + have_hi1 = True; + } + if (attr == DW_AT_ranges && ctsSzB > 0) { + rangeoff = cts; + have_range = True; + } + if (attr == DW_AT_stmt_list && ctsSzB > 0) { + read_filename_table( parser, cc, (UWord)cts, td3 ); + } + } + /* Now, does this give us an opportunity to find this + CU's svma? */ +#if 0 + if (level == 0 && have_lo) { + vg_assert(!cc->cu_svma_known); /* if this fails, it must be + because we've already seen a DW_TAG_compile_unit DIE at level + 0. But that can't happen, because DWARF3 only allows exactly + one top level DIE per CU. */ + cc->cu_svma_known = True; + cc->cu_svma = ip_lo; + if (1) + TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma); + /* Now, it may be that this DIE doesn't tell us the CU's + SVMA, by way of not having a DW_AT_low_pc. That's OK -- + the CU doesn't *have* to have its SVMA specified. + + But as per last para D3 spec sec 3.1.1 ("Normal and + Partial Compilation Unit Entries", "If the base address + (viz, the SVMA) is undefined, then any DWARF entry of + structure defined interms of the base address of that + compilation unit is not valid.". So that means, if whilst + processing the children of this top level DIE (or their + children, etc) we see a DW_AT_range, and cu_svma_known is + False, then the DIE that contains it is (per the spec) + invalid, and we can legitimately stop and complain. */ + } +#else + /* .. whereas The Reality is, simply assume the SVMA is zero + if it isn't specified. */ + if (level == 0) { + vg_assert(!cc->cu_svma_known); + cc->cu_svma_known = True; + if (have_lo) + cc->cu_svma = ip_lo; + else + cc->cu_svma = 0; + } +#endif + /* Do we have something that looks sane? */ + if (have_lo && have_hi1 && (!have_range)) { + if (ip_lo < ip_hi1) + varstack_push( cc, parser, td3, + unitary_range_list(ip_lo, ip_hi1 - 1), + level, + False/*isFunc*/, NULL/*fbGX*/ ); + } else + if ((!have_lo) && (!have_hi1) && have_range) { + varstack_push( cc, parser, td3, + get_range_list( cc, td3, + rangeoff, cc->cu_svma ), + level, + False/*isFunc*/, NULL/*fbGX*/ ); + } else + if ((!have_lo) && (!have_hi1) && (!have_range)) { + /* CU has no code, presumably? */ + varstack_push( cc, parser, td3, + empty_range_list(), + level, + False/*isFunc*/, NULL/*fbGX*/ ); + } else + if (have_lo && (!have_hi1) && have_range && ip_lo == 0) { + /* broken DIE created by gcc-4.3.X ? Ignore the + apparently-redundant DW_AT_low_pc and use the DW_AT_ranges + instead. */ + varstack_push( cc, parser, td3, + get_range_list( cc, td3, + rangeoff, cc->cu_svma ), + level, + False/*isFunc*/, NULL/*fbGX*/ ); + } else { + if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n", + (Int)have_lo, (Int)have_hi1, (Int)have_range); + goto bad_DIE; + } + } + + if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) { + Bool have_lo = False; + Bool have_hi1 = False; + Bool have_range = False; + Addr ip_lo = 0; + Addr ip_hi1 = 0; + Addr rangeoff = 0; + Bool isFunc = dtag == DW_TAG_subprogram; + GExpr* fbGX = NULL; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_low_pc && ctsSzB > 0) { + ip_lo = cts; + have_lo = True; + } + if (attr == DW_AT_high_pc && ctsSzB > 0) { + ip_hi1 = cts; + have_hi1 = True; + } + if (attr == DW_AT_ranges && ctsSzB > 0) { + rangeoff = cts; + have_range = True; + } + if (isFunc + && attr == DW_AT_frame_base + && ((ctsMemSzB > 0 && ctsSzB == 0) + || (ctsMemSzB == 0 && ctsSzB > 0))) { + fbGX = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); + vg_assert(fbGX); + VG_(addToXA)(gexprs, &fbGX); + } + } + /* Do we have something that looks sane? */ + if (dtag == DW_TAG_subprogram + && (!have_lo) && (!have_hi1) && (!have_range)) { + /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry + representing a subroutine declaration that is not also a + definition does not have code address or range + attributes." */ + } else + if (dtag == DW_TAG_lexical_block + && (!have_lo) && (!have_hi1) && (!have_range)) { + /* I believe this is legit, and means the lexical block + contains no insns (whatever that might mean). Ignore. */ + } else + if (have_lo && have_hi1 && (!have_range)) { + /* This scope supplies just a single address range. */ + if (ip_lo < ip_hi1) + varstack_push( cc, parser, td3, + unitary_range_list(ip_lo, ip_hi1 - 1), + level, isFunc, fbGX ); + } else + if ((!have_lo) && (!have_hi1) && have_range) { + /* This scope supplies multiple address ranges via the use of + a range list. */ + varstack_push( cc, parser, td3, + get_range_list( cc, td3, + rangeoff, cc->cu_svma ), + level, isFunc, fbGX ); + } else + if (have_lo && (!have_hi1) && (!have_range)) { + /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block + Entries) says fairly clearly that a scope must have either + _range or (_low_pc and _high_pc). */ + /* The spec is a bit ambiguous though. Perhaps a single byte + range is intended? See sec 2.17 (Code Addresses And Ranges) */ + /* This case is here because icc9 produced this: + <2><13bd>: DW_TAG_lexical_block + DW_AT_decl_line : 5229 + DW_AT_decl_column : 37 + DW_AT_decl_file : 1 + DW_AT_low_pc : 0x401b03 + */ + /* Ignore (seems safe than pushing a single byte range) */ + } else + goto bad_DIE; + } + + if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) { + UChar* name = NULL; + UWord typeR = D3_INVALID_CUOFF; + Bool external = False; + GExpr* gexpr = NULL; + Int n_attrs = 0; + UWord abs_ori = (UWord)D3_INVALID_CUOFF; + Int lineNo = 0; + UChar* fileName = NULL; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + n_attrs++; + if (attr == DW_AT_name && ctsMemSzB > 0) { + name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 ); + } + if (attr == DW_AT_location + && ((ctsMemSzB > 0 && ctsSzB == 0) + || (ctsMemSzB == 0 && ctsSzB > 0))) { + gexpr = get_GX( cc, False/*td3*/, cts, ctsSzB, ctsMemSzB ); + vg_assert(gexpr); + VG_(addToXA)(gexprs, &gexpr); + } + if (attr == DW_AT_type && ctsSzB > 0) { + typeR = (UWord)cts; + } + if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) { + external = True; + } + if (attr == DW_AT_abstract_origin && ctsSzB > 0) { + abs_ori = (UWord)cts; + } + if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { + /*declaration = True;*/ + } + if (attr == DW_AT_decl_line && ctsSzB > 0) { + lineNo = (Int)cts; + } + if (attr == DW_AT_decl_file && ctsSzB > 0) { + Int ftabIx = (Int)cts; + if (ftabIx >= 1 + && ftabIx < VG_(sizeXA)( parser->filenameTable )) { + fileName = *(UChar**) + VG_(indexXA)( parser->filenameTable, ftabIx ); + vg_assert(fileName); + } + if (0) VG_(printf)("XXX filename = %s\n", fileName); + } + } + /* We'll collect it under if one of the following three + conditions holds: + (1) has location and type -> completed + (2) has type only -> is an abstract instance + (3) has location and abs_ori -> is a concrete instance + Name, filename and line number are all optional frills. + */ + if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF) + /* 2 */ || (typeR != D3_INVALID_CUOFF) + /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) { + + /* Add this variable to the list of interesting looking + variables. Crucially, note along with it the address + range(s) associated with the variable, which for locals + will be the address ranges at the top of the varparser's + stack. */ + GExpr* fbGX = NULL; + Word i, nRanges; + XArray* /* of AddrRange */ xa; + TempVar* tv; + /* Stack can't be empty; we put a dummy entry on it for the + entire address range before starting with the DIEs for + this CU. */ + vg_assert(parser->sp >= 0); + + /* If this is a local variable (non-external), try to find + the GExpr for the DW_AT_frame_base of the containing + function. It should have been pushed on the stack at the + time we encountered its DW_TAG_subprogram DIE, so the way + to find it is to scan back down the stack looking for it. + If there isn't an enclosing stack entry marked 'isFunc' + then we must be seeing variable or formal param DIEs + outside of a function, so we deem the Dwarf to be + malformed if that happens. Note that the fbGX may be NULL + if the containing DT_TAG_subprogram didn't supply a + DW_AT_frame_base -- that's OK, but there must actually be + a containing DW_TAG_subprogram. */ + if (!external) { + Bool found = False; + for (i = parser->sp; i >= 0; i--) { + if (parser->isFunc[i]) { + fbGX = parser->fbGX[i]; + found = True; + break; + } + } + if (!found) { + if (0 && VG_(clo_verbosity) >= 0) { + VG_(message)(Vg_DebugMsg, + "warning: parse_var_DIE: non-external variable " + "outside DW_TAG_subprogram"); + } + /* goto bad_DIE; */ + /* This seems to happen a lot. Just ignore it -- if, + when we come to evaluation of the location (guarded) + expression, it requires a frame base value, and + there's no expression for that, then evaluation as a + whole will fail. Harmless - a bit of a waste of + cycles but nothing more. */ + } + } + + /* re "external ? 0 : parser->sp" (twice), if the var is + marked 'external' then we must put it at the global scope, + as only the global scope (level 0) covers the entire PC + address space. It is asserted elsewhere that level 0 + always covers the entire address space. */ + xa = parser->ranges[external ? 0 : parser->sp]; + nRanges = VG_(sizeXA)(xa); + vg_assert(nRanges >= 0); + + tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) ); + tv->name = name; + tv->level = external ? 0 : parser->sp; + tv->typeR = typeR; + tv->gexpr = gexpr; + tv->fbGX = fbGX; + tv->fName = fileName; + tv->fLine = lineNo; + tv->dioff = posn; + tv->absOri = abs_ori; + + /* See explanation on definition of type TempVar for the + reason for this elaboration. */ + tv->nRanges = nRanges; + tv->rngOneMin = 0; + tv->rngOneMax = 0; + tv->rngMany = NULL; + if (nRanges == 1) { + AddrRange* range = VG_(indexXA)(xa, 0); + tv->rngOneMin = range->aMin; + tv->rngOneMax = range->aMax; + } + else if (nRanges > 1) { + /* See if we already have a range list which is + structurally identical. If so, use that; if not, clone + this one, and add it to our collection. */ + UWord keyW, valW; + if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) { + XArray* old = (XArray*)keyW; + tl_assert(valW == 0); + tl_assert(old != xa); + tv->rngMany = old; + } else { + XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa ); + tv->rngMany = cloned; + VG_(addToFM)( rangestree, (UWord)cloned, 0 ); + } + } + + VG_(addToXA)( tempvars, &tv ); + + TRACE_D3(" Recording this variable, with %ld PC range(s)\n", + VG_(sizeXA)(xa) ); + /* collect stats on how effective the ->ranges special + casing is */ + if (0) { + static Int ntot=0, ngt=0; + ntot++; + if (tv->rngMany) ngt++; + if (0 == (ntot % 100000)) + VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt); + } + + } + + /* Here are some other weird cases seen in the wild: + + We have a variable with a name and a type, but no + location. I guess that's a sign that it has been + optimised away. Ignore it. Here's an example: + + static Int lc_compar(void* n1, void* n2) { + MC_Chunk* mc1 = *(MC_Chunk**)n1; + MC_Chunk* mc2 = *(MC_Chunk**)n2; + return (mc1->data < mc2->data ? -1 : 1); + } + + Both mc1 and mc2 are like this + <2><5bc>: Abbrev Number: 21 (DW_TAG_variable) + DW_AT_name : mc1 + DW_AT_decl_file : 1 + DW_AT_decl_line : 216 + DW_AT_type : <5d3> + + whereas n1 and n2 do have locations specified. + + --------------------------------------------- + + We see a DW_TAG_formal_parameter with a type, but + no name and no location. It's probably part of a function type + construction, thusly, hence ignore it: + <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type) + DW_AT_sibling : <2c9> + DW_AT_prototyped : 1 + DW_AT_type : <114> + <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter) + DW_AT_type : <13e> + <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter) + DW_AT_type : <133> + + --------------------------------------------- + + Is very minimal, like this: + <4><81d>: Abbrev Number: 44 (DW_TAG_variable) + DW_AT_abstract_origin: <7ba> + What that signifies I have no idea. Ignore. + + ---------------------------------------------- + + Is very minimal, like this: + <200f>: DW_TAG_formal_parameter + DW_AT_abstract_ori: <1f4c> + DW_AT_location : 13440 + What that signifies I have no idea. Ignore. + It might be significant, though: the variable at least + has a location and so might exist somewhere. + Maybe we should handle this. + + --------------------------------------------- + + <22407>: DW_TAG_variable + DW_AT_name : (indirect string, offset: 0x6579): + vgPlain_trampoline_stuff_start + DW_AT_decl_file : 29 + DW_AT_decl_line : 56 + DW_AT_external : 1 + DW_AT_declaration : 1 + + Nameless and typeless variable that has a location? Who + knows. Not me. + <2><3d178>: Abbrev Number: 22 (DW_TAG_variable) + DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0 + (DW_OP_addr: 3813c7c0) + + No, really. Check it out. gcc is quite simply borked. + <3><168cc>: Abbrev Number: 141 (DW_TAG_variable) + // followed by no attributes, and the next DIE is a sibling, + // not a child + */ + } + return; + + bad_DIE: + set_position_of_Cursor( c_die, saved_die_c_offset ); + set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); + VG_(printf)("\nparse_var_DIE: confused by:\n"); + VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); + /* Get the form contents, so as to print them */ + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, True, form ); + VG_(printf)("\t\n"); + } + VG_(printf)("\n"); + cc->barf("parse_var_DIE: confused by the above DIE"); + /*NOTREACHED*/ +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Parsing of type-related DIEs ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +#define N_D3_TYPE_STACK 16 + +typedef + struct { + /* What source language? 'C'=C/C++, 'F'=Fortran, '?'=other + Established once per compilation unit. */ + UChar language; + /* A stack of types which are currently under construction */ + Int sp; /* [sp] is innermost active entry; sp==-1 for empty + stack */ + /* Note that the TyEnts in qparentE are temporary copies of the + ones accumulating in the main tyent array. So it is not safe + to free up anything on them when popping them off the stack + (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just + memset them to zero when done. */ + TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */ + Int qlevel[N_D3_TYPE_STACK]; + + } + D3TypeParser; + +static void typestack_show ( D3TypeParser* parser, HChar* str ) { + Word i; + VG_(printf)(" typestack (%s) {\n", str); + for (i = 0; i <= parser->sp; i++) { + VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]); + ML_(pp_TyEnt)( &parser->qparentE[i] ); + VG_(printf)("\n"); + } + VG_(printf)(" }\n"); +} + +/* Remove from the stack, all entries with .level > 'level' */ +static +void typestack_preen ( D3TypeParser* parser, Bool td3, Int level ) +{ + Bool changed = False; + vg_assert(parser->sp < N_D3_TYPE_STACK); + while (True) { + vg_assert(parser->sp >= -1); + if (parser->sp == -1) break; + if (parser->qlevel[parser->sp] <= level) break; + if (0) + TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1); + vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); + VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt)); + parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF; + parser->qparentE[parser->sp].tag = Te_EMPTY; + parser->qlevel[parser->sp] = 0; + parser->sp--; + changed = True; + } + if (changed && td3) + typestack_show( parser, "after preen" ); +} + +static Bool typestack_is_empty ( D3TypeParser* parser ) { + vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK); + return parser->sp == -1; +} + +static void typestack_push ( CUConst* cc, + D3TypeParser* parser, + Bool td3, + TyEnt* parentE, Int level ) { + if (0) + TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n", + parser->sp+1, level, parentE->cuOff); + + /* First we need to zap everything >= 'level', as we are about to + replace any previous entry at 'level', so .. */ + typestack_preen(parser, /*td3*/False, level-1); + + vg_assert(parser->sp >= -1); + vg_assert(parser->sp < N_D3_TYPE_STACK); + if (parser->sp == N_D3_TYPE_STACK-1) + cc->barf("typestack_push: N_D3_TYPE_STACK is too low; " + "increase and recompile"); + if (parser->sp >= 0) + vg_assert(parser->qlevel[parser->sp] < level); + parser->sp++; + vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY); + vg_assert(parser->qlevel[parser->sp] == 0); + vg_assert(parentE); + vg_assert(ML_(TyEnt__is_type)(parentE)); + vg_assert(parentE->cuOff != D3_INVALID_CUOFF); + parser->qparentE[parser->sp] = *parentE; + parser->qlevel[parser->sp] = level; + if (td3) + typestack_show( parser, "after push" ); +} + + +/* Parse a type-related DIE. 'parser' holds the current parser state. + 'admin' is where the completed types are dumped. 'dtag' is the tag + for this DIE. 'c_die' points to the start of the data fields (FORM + stuff) for the DIE. c_abbv points to the start of the (name,form) + pairs which describe the DIE. + + We may find the DIE uninteresting, in which case we should ignore + it. + + What happens: the DIE is examined. If uninteresting, it is ignored. + Otherwise, the DIE gives rise to two things: + + (1) the offset of this DIE in the CU -- the cuOffset, a UWord + (2) a TyAdmin structure, which holds the type, or related stuff + + (2) is added at the end of 'tyadmins', at some index, say 'i'. + + A pair (cuOffset, i) is added to 'tydict'. + + Hence 'tyadmins' holds the actual type entities, and 'tydict' holds + a mapping from cuOffset to the index of the corresponding entry in + 'tyadmin'. + + When resolving a cuOffset to a TyAdmin, first look up the cuOffset + in the tydict (by binary search). This gives an index into + tyadmins, and the required entity lives in tyadmins at that index. +*/ +__attribute__((noinline)) +static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents, + /*MOD*/D3TypeParser* parser, + DW_TAG dtag, + UWord posn, + Int level, + Cursor* c_die, + Cursor* c_abbv, + CUConst* cc, + Bool td3 ) +{ + ULong cts; + Int ctsSzB; + UWord ctsMemSzB; + TyEnt typeE; + TyEnt atomE; + TyEnt fieldE; + TyEnt boundE; + + UWord saved_die_c_offset = get_position_of_Cursor( c_die ); + UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv ); + + VG_(memset)( &typeE, 0xAA, sizeof(typeE) ); + VG_(memset)( &atomE, 0xAA, sizeof(atomE) ); + VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) ); + VG_(memset)( &boundE, 0xAA, sizeof(boundE) ); + + /* If we've returned to a level at or above any previously noted + parent, un-note it, so we don't believe we're still collecting + its children. */ + typestack_preen( parser, td3, level-1 ); + + if (dtag == DW_TAG_compile_unit) { + /* See if we can find DW_AT_language, since it is important for + establishing array bounds (see DW_TAG_subrange_type below in + this fn) */ + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr != DW_AT_language) + continue; + if (ctsSzB == 0) + goto bad_DIE; + switch (cts) { + case DW_LANG_C89: case DW_LANG_C: + case DW_LANG_C_plus_plus: case DW_LANG_ObjC: + case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC: + case DW_LANG_Upc: + parser->language = 'C'; break; + case DW_LANG_Fortran77: case DW_LANG_Fortran90: + case DW_LANG_Fortran95: + parser->language = 'F'; break; + case DW_LANG_Ada83: case DW_LANG_Cobol74: + case DW_LANG_Cobol85: case DW_LANG_Pascal83: + case DW_LANG_Modula2: case DW_LANG_Java: + case DW_LANG_C99: case DW_LANG_Ada95: + case DW_LANG_PLI: case DW_LANG_D: + case DW_LANG_Mips_Assembler: + parser->language = '?'; break; + default: + goto bad_DIE; + } + } + } + + if (dtag == DW_TAG_base_type) { + /* We can pick up a new base type any time. */ + VG_(memset)(&typeE, 0, sizeof(typeE)); + typeE.cuOff = D3_INVALID_CUOFF; + typeE.tag = Te_TyBase; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_name && ctsMemSzB > 0) { + typeE.Te.TyBase.name + = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1", + (UChar*)(UWord)cts ); + } + if (attr == DW_AT_byte_size && ctsSzB > 0) { + typeE.Te.TyBase.szB = cts; + } + if (attr == DW_AT_encoding && ctsSzB > 0) { + switch (cts) { + case DW_ATE_unsigned: case DW_ATE_unsigned_char: + case DW_ATE_boolean:/* FIXME - is this correct? */ + typeE.Te.TyBase.enc = 'U'; break; + case DW_ATE_signed: case DW_ATE_signed_char: + typeE.Te.TyBase.enc = 'S'; break; + case DW_ATE_float: + typeE.Te.TyBase.enc = 'F'; break; + case DW_ATE_complex_float: + typeE.Te.TyBase.enc = 'C'; break; + default: + goto bad_DIE; + } + } + } + + /* Invent a name if it doesn't have one. gcc-4.3 + -ftree-vectorize is observed to emit nameless base types. */ + if (!typeE.Te.TyBase.name) + typeE.Te.TyBase.name + = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2", + "<anon_base_type>" ); + + /* Do we have something that looks sane? */ + if (/* must have a name */ + typeE.Te.TyBase.name == NULL + /* and a plausible size. Yes, really 32: "complex long + double" apparently has size=32 */ + || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32 + /* and a plausible encoding */ + || (typeE.Te.TyBase.enc != 'U' + && typeE.Te.TyBase.enc != 'S' + && typeE.Te.TyBase.enc != 'F' + && typeE.Te.TyBase.enc != 'C')) + goto bad_DIE; + /* Last minute hack: if we see this + <1><515>: DW_TAG_base_type + DW_AT_byte_size : 0 + DW_AT_encoding : 5 + DW_AT_name : void + convert it into a real Void type. */ + if (typeE.Te.TyBase.szB == 0 + && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) { + ML_(TyEnt__make_EMPTY)(&typeE); + typeE.tag = Te_TyVoid; + typeE.Te.TyVoid.isFake = False; /* it's a real one! */ + } + + goto acquire_Type; + } + + if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type + || dtag == DW_TAG_ptr_to_member_type) { + /* This seems legit for _pointer_type and _reference_type. I + don't know if rolling _ptr_to_member_type in here really is + legit, but it's better than not handling it at all. */ + VG_(memset)(&typeE, 0, sizeof(typeE)); + typeE.cuOff = D3_INVALID_CUOFF; + typeE.tag = Te_TyPorR; + /* target type defaults to void */ + typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF; + typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type + || dtag == DW_TAG_ptr_to_member_type; + /* These three type kinds don't *have* to specify their size, in + which case we assume it's a machine word. But if they do + specify it, it must be a machine word :-) This probably + assumes that the word size of the Dwarf3 we're reading is the + same size as that on the machine. gcc appears to give a size + whereas icc9 doesn't. */ + typeE.Te.TyPorR.szB = sizeof(UWord); + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_byte_size && ctsSzB > 0) { + typeE.Te.TyPorR.szB = cts; + } + if (attr == DW_AT_type && ctsSzB > 0) { + typeE.Te.TyPorR.typeR = (UWord)cts; + } + } + /* Do we have something that looks sane? */ + if (typeE.Te.TyPorR.szB != sizeof(UWord)) + goto bad_DIE; + else + goto acquire_Type; + } + + if (dtag == DW_TAG_enumeration_type) { + /* Create a new Type to hold the results. */ + VG_(memset)(&typeE, 0, sizeof(typeE)); + typeE.cuOff = posn; + typeE.tag = Te_TyEnum; + typeE.Te.TyEnum.atomRs + = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1", + ML_(dinfo_free), + sizeof(UWord) ); + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_name && ctsMemSzB > 0) { + typeE.Te.TyEnum.name + = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2", + (UChar*)(UWord)cts ); + } + if (attr == DW_AT_byte_size && ctsSzB > 0) { + typeE.Te.TyEnum.szB = cts; + } + } + + if (!typeE.Te.TyEnum.name) + typeE.Te.TyEnum.name + = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3", + "<anon_enum_type>" ); + + /* Do we have something that looks sane? */ + if (typeE.Te.TyEnum.szB == 0 /* we must know the size */) + goto bad_DIE; + /* On't stack! */ + typestack_push( cc, parser, td3, &typeE, level ); + goto acquire_Type; + } + + /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces + DW_TAG_enumerator with only a DW_AT_name but no + DW_AT_const_value. This is in violation of the Dwarf3 standard, + and appears to be a new "feature" of gcc - versions 4.3.x and + earlier do not appear to do this. So accept DW_TAG_enumerator + which only have a name but no value. An example: + + <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type) + <181> DW_AT_name : (indirect string, offset: 0xda70): + QtMsgType + <185> DW_AT_byte_size : 4 + <186> DW_AT_decl_file : 14 + <187> DW_AT_decl_line : 1480 + <189> DW_AT_sibling : <0x1a7> + <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator) + <18e> DW_AT_name : (indirect string, offset: 0x9e18): + QtDebugMsg + <2><192>: Abbrev Number: 7 (DW_TAG_enumerator) + <193> DW_AT_name : (indirect string, offset: 0x1505f): + QtWarningMsg + <2><197>: Abbrev Number: 7 (DW_TAG_enumerator) + <198> DW_AT_name : (indirect string, offset: 0x16f4a): + QtCriticalMsg + <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator) + <19d> DW_AT_name : (indirect string, offset: 0x156dd): + QtFatalMsg + <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator) + <1a2> DW_AT_name : (indirect string, offset: 0x13660): + QtSystemMsg + */ + if (dtag == DW_TAG_enumerator) { + VG_(memset)( &atomE, 0, sizeof(atomE) ); + atomE.cuOff = posn; + atomE.tag = Te_Atom; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_name && ctsMemSzB > 0) { + atomE.Te.Atom.name + = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1", + (UChar*)(UWord)cts ); + } + if (attr == DW_AT_const_value && ctsSzB > 0) { + atomE.Te.Atom.value = cts; + atomE.Te.Atom.valueKnown = True; + } + } + /* Do we have something that looks sane? */ + if (atomE.Te.Atom.name == NULL) + goto bad_DIE; + /* Do we have a plausible parent? */ + if (typestack_is_empty(parser)) goto bad_DIE; + vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); + vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); + if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; + if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE; + /* Record this child in the parent */ + vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs); + VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs, + &atomE ); + /* And record the child itself */ + goto acquire_Atom; + } + + /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I + don't know if this is correct, but it at least makes this reader + usable for gcc-4.3 produced Dwarf3. */ + if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type + || dtag == DW_TAG_union_type) { + Bool have_szB = False; + Bool is_decl = False; + Bool is_spec = False; + /* Create a new Type to hold the results. */ + VG_(memset)(&typeE, 0, sizeof(typeE)); + typeE.cuOff = posn; + typeE.tag = Te_TyStOrUn; + typeE.Te.TyStOrUn.name = NULL; + typeE.Te.TyStOrUn.fieldRs + = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1", + ML_(dinfo_free), + sizeof(UWord) ); + typeE.Te.TyStOrUn.complete = True; + typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type + || dtag == DW_TAG_class_type; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_name && ctsMemSzB > 0) { + typeE.Te.TyStOrUn.name + = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2", + (UChar*)(UWord)cts ); + } + if (attr == DW_AT_byte_size && ctsSzB >= 0) { + typeE.Te.TyStOrUn.szB = cts; + have_szB = True; + } + if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) { + is_decl = True; + } + if (attr == DW_AT_specification && ctsSzB > 0 && cts > 0) { + is_spec = True; + } + } + /* Do we have something that looks sane? */ + if (is_decl && (!is_spec)) { + /* It's a DW_AT_declaration. We require the name but + nothing else. */ + if (typeE.Te.TyStOrUn.name == NULL) + goto bad_DIE; + typeE.Te.TyStOrUn.complete = False; + goto acquire_Type; + } + if ((!is_decl) /* && (!is_spec) */) { + /* this is the common, ordinary case */ + if ((!have_szB) /* we must know the size */ + /* But the name can be present, or not */) + goto bad_DIE; + /* On't stack! */ + typestack_push( cc, parser, td3, &typeE, level ); + goto acquire_Type; + } + else { + /* don't know how to handle any other variants just now */ + goto bad_DIE; + } + } + + if (dtag == DW_TAG_member) { + /* Acquire member entries for both DW_TAG_structure_type and + DW_TAG_union_type. They differ minorly, in that struct + members must have a DW_AT_data_member_location expression + whereas union members must not. */ + Bool parent_is_struct; + VG_(memset)( &fieldE, 0, sizeof(fieldE) ); + fieldE.cuOff = posn; + fieldE.tag = Te_Field; + fieldE.Te.Field.typeR = D3_INVALID_CUOFF; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_name && ctsMemSzB > 0) { + fieldE.Te.Field.name + = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1", + (UChar*)(UWord)cts ); + } + if (attr == DW_AT_type && ctsSzB > 0) { + fieldE.Te.Field.typeR = (UWord)cts; + } + if (attr == DW_AT_data_member_location && ctsMemSzB > 0) { + fieldE.Te.Field.nLoc = (UWord)ctsMemSzB; + fieldE.Te.Field.loc + = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2", + (UChar*)(UWord)cts, + (SizeT)fieldE.Te.Field.nLoc ); + } + } + /* Do we have a plausible parent? */ + if (typestack_is_empty(parser)) goto bad_DIE; + vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); + vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); + if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; + if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE; + /* Do we have something that looks sane? If this a member of a + struct, we must have a location expression; but if a member + of a union that is irrelevant (D3 spec sec 5.6.6). We ought + to reject in the latter case, but some compilers have been + observed to emit constant-zero expressions. So just ignore + them. */ + parent_is_struct + = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct; + if (!fieldE.Te.Field.name) + fieldE.Te.Field.name + = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3", + "<anon_field>" ); + vg_assert(fieldE.Te.Field.name); + if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF) + goto bad_DIE; + if (parent_is_struct && (!fieldE.Te.Field.loc)) + goto bad_DIE; + if ((!parent_is_struct) && fieldE.Te.Field.loc) { + /* If this is a union type, pretend we haven't seen the data + member location expression, as it is by definition + redundant (it must be zero). */ + ML_(dinfo_free)(fieldE.Te.Field.loc); + fieldE.Te.Field.loc = NULL; + fieldE.Te.Field.nLoc = 0; + } + /* Record this child in the parent */ + fieldE.Te.Field.isStruct = parent_is_struct; + vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs); + VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs, + &posn ); + /* And record the child itself */ + goto acquire_Field; + } + + if (dtag == DW_TAG_array_type) { + VG_(memset)(&typeE, 0, sizeof(typeE)); + typeE.cuOff = posn; + typeE.tag = Te_TyArray; + typeE.Te.TyArray.typeR = D3_INVALID_CUOFF; + typeE.Te.TyArray.boundRs + = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1", + ML_(dinfo_free), + sizeof(UWord) ); + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_type && ctsSzB > 0) { + typeE.Te.TyArray.typeR = (UWord)cts; + } + } + if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF) + goto bad_DIE; + /* On't stack! */ + typestack_push( cc, parser, td3, &typeE, level ); + goto acquire_Type; + } + + if (dtag == DW_TAG_subrange_type) { + Bool have_lower = False; + Bool have_upper = False; + Bool have_count = False; + Long lower = 0; + Long upper = 0; + + switch (parser->language) { + case 'C': have_lower = True; lower = 0; break; + case 'F': have_lower = True; lower = 1; break; + case '?': have_lower = False; break; + default: vg_assert(0); /* assured us by handling of + DW_TAG_compile_unit in this fn */ + } + + VG_(memset)( &boundE, 0, sizeof(boundE) ); + boundE.cuOff = D3_INVALID_CUOFF; + boundE.tag = Te_Bound; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_lower_bound && ctsSzB > 0) { + lower = (Long)cts; + have_lower = True; + } + if (attr == DW_AT_upper_bound && ctsSzB > 0) { + upper = (Long)cts; + have_upper = True; + } + if (attr == DW_AT_count && ctsSzB > 0) { + /*count = (Long)cts;*/ + have_count = True; + } + } + /* FIXME: potentially skip the rest if no parent present, since + it could be the case that this subrange type is free-standing + (not being used to describe the bounds of a containing array + type) */ + /* Do we have a plausible parent? */ + if (typestack_is_empty(parser)) goto bad_DIE; + vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp])); + vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF); + if (level != parser->qlevel[parser->sp]+1) goto bad_DIE; + if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE; + + /* Figure out if we have a definite range or not */ + if (have_lower && have_upper && (!have_count)) { + boundE.Te.Bound.knownL = True; + boundE.Te.Bound.knownU = True; + boundE.Te.Bound.boundL = lower; + boundE.Te.Bound.boundU = upper; + } + else if (have_lower && (!have_upper) && (!have_count)) { + boundE.Te.Bound.knownL = True; + boundE.Te.Bound.knownU = False; + boundE.Te.Bound.boundL = lower; + boundE.Te.Bound.boundU = 0; + } else { + /* FIXME: handle more cases */ + goto bad_DIE; + } + + /* Record this bound in the parent */ + boundE.cuOff = posn; + vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs); + VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs, + &boundE ); + /* And record the child itself */ + goto acquire_Bound; + } + + if (dtag == DW_TAG_typedef) { + /* We can pick up a new typedef any time. */ + VG_(memset)(&typeE, 0, sizeof(typeE)); + typeE.cuOff = D3_INVALID_CUOFF; + typeE.tag = Te_TyTyDef; + typeE.Te.TyTyDef.name = NULL; + typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_name && ctsMemSzB > 0) { + typeE.Te.TyTyDef.name + = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1", + (UChar*)(UWord)cts ); + } + if (attr == DW_AT_type && ctsSzB > 0) { + typeE.Te.TyTyDef.typeR = (UWord)cts; + } + } + /* Do we have something that looks sane? */ + if (/* must have a name */ + typeE.Te.TyTyDef.name == NULL + /* but the referred-to type can be absent */) + goto bad_DIE; + else + goto acquire_Type; + } + + if (dtag == DW_TAG_subroutine_type) { + /* function type? just record that one fact and ask no + further questions. */ + VG_(memset)(&typeE, 0, sizeof(typeE)); + typeE.cuOff = D3_INVALID_CUOFF; + typeE.tag = Te_TyFn; + goto acquire_Type; + } + + if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) { + Int have_ty = 0; + VG_(memset)(&typeE, 0, sizeof(typeE)); + typeE.cuOff = D3_INVALID_CUOFF; + typeE.tag = Te_TyQual; + typeE.Te.TyQual.qual + = dtag == DW_TAG_volatile_type ? 'V' : 'C'; + /* target type defaults to 'void' */ + typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF; + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, False/*td3*/, form ); + if (attr == DW_AT_type && ctsSzB > 0) { + typeE.Te.TyQual.typeR = (UWord)cts; + have_ty++; + } + } + /* gcc sometimes generates DW_TAG_const/volatile_type without + DW_AT_type and GDB appears to interpret the type as 'const + void' (resp. 'volatile void'). So just allow it .. */ + if (have_ty == 1 || have_ty == 0) + goto acquire_Type; + else + goto bad_DIE; + } + + /* else ignore this DIE */ + return; + /*NOTREACHED*/ + + acquire_Type: + if (0) VG_(printf)("YYYY Acquire Type\n"); + vg_assert(ML_(TyEnt__is_type)( &typeE )); + vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn); + typeE.cuOff = posn; + VG_(addToXA)( tyents, &typeE ); + return; + /*NOTREACHED*/ + + acquire_Atom: + if (0) VG_(printf)("YYYY Acquire Atom\n"); + vg_assert(atomE.tag == Te_Atom); + vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn); + atomE.cuOff = posn; + VG_(addToXA)( tyents, &atomE ); + return; + /*NOTREACHED*/ + + acquire_Field: + /* For union members, Expr should be absent */ + if (0) VG_(printf)("YYYY Acquire Field\n"); + vg_assert(fieldE.tag == Te_Field); + vg_assert( (fieldE.Te.Field.nLoc > 0 && fieldE.Te.Field.loc != NULL) + || (fieldE.Te.Field.nLoc == 0 && fieldE.Te.Field.loc == NULL) ); + if (fieldE.Te.Field.isStruct) { + vg_assert(fieldE.Te.Field.nLoc > 0); + } else { + vg_assert(fieldE.Te.Field.nLoc == 0); + } + vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn); + fieldE.cuOff = posn; + VG_(addToXA)( tyents, &fieldE ); + return; + /*NOTREACHED*/ + + acquire_Bound: + if (0) VG_(printf)("YYYY Acquire Bound\n"); + vg_assert(boundE.tag == Te_Bound); + vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn); + boundE.cuOff = posn; + VG_(addToXA)( tyents, &boundE ); + return; + /*NOTREACHED*/ + + bad_DIE: + set_position_of_Cursor( c_die, saved_die_c_offset ); + set_position_of_Cursor( c_abbv, saved_abbv_c_offset ); + VG_(printf)("\nparse_type_DIE: confused by:\n"); + VG_(printf)(" <%d><%lx>: %s\n", level, posn, ML_(pp_DW_TAG)( dtag ) ); + while (True) { + DW_AT attr = (DW_AT) get_ULEB128( c_abbv ); + DW_FORM form = (DW_FORM)get_ULEB128( c_abbv ); + if (attr == 0 && form == 0) break; + VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr)); + /* Get the form contents, so as to print them */ + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c_die, True, form ); + VG_(printf)("\t\n"); + } + VG_(printf)("\n"); + cc->barf("parse_type_DIE: confused by the above DIE"); + /*NOTREACHED*/ +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Compression of type DIE information ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +static UWord chase_cuOff ( Bool* changed, + XArray* /* of TyEnt */ ents, + TyEntIndexCache* ents_cache, + UWord cuOff ) +{ + TyEnt* ent; + ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff ); + + if (!ent) { + VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff); + *changed = False; + return cuOff; + } + + vg_assert(ent->tag != Te_EMPTY); + if (ent->tag != Te_INDIR) { + *changed = False; + return cuOff; + } else { + vg_assert(ent->Te.INDIR.indR < cuOff); + *changed = True; + return ent->Te.INDIR.indR; + } +} + +static +void chase_cuOffs_in_XArray ( Bool* changed, + XArray* /* of TyEnt */ ents, + TyEntIndexCache* ents_cache, + /*MOD*/XArray* /* of UWord */ cuOffs ) +{ + Bool b2 = False; + Word i, n = VG_(sizeXA)( cuOffs ); + for (i = 0; i < n; i++) { + Bool b = False; + UWord* p = VG_(indexXA)( cuOffs, i ); + *p = chase_cuOff( &b, ents, ents_cache, *p ); + if (b) + b2 = True; + } + *changed = b2; +} + +static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents, + TyEntIndexCache* ents_cache, + /*MOD*/TyEnt* te ) +{ + Bool b, changed = False; + switch (te->tag) { + case Te_EMPTY: + break; + case Te_INDIR: + te->Te.INDIR.indR + = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR ); + if (b) changed = True; + break; + case Te_UNKNOWN: + break; + case Te_Atom: + break; + case Te_Field: + te->Te.Field.typeR + = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR ); + if (b) changed = True; + break; + case Te_Bound: + break; + case Te_TyBase: + break; + case Te_TyPorR: + te->Te.TyPorR.typeR + = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR ); + if (b) changed = True; + break; + case Te_TyTyDef: + te->Te.TyTyDef.typeR + = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR ); + if (b) changed = True; + break; + case Te_TyStOrUn: + chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs ); + if (b) changed = True; + break; + case Te_TyEnum: + chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs ); + if (b) changed = True; + break; + case Te_TyArray: + te->Te.TyArray.typeR + = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR ); + if (b) changed = True; + chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs ); + if (b) changed = True; + break; + case Te_TyFn: + break; + case Te_TyQual: + te->Te.TyQual.typeR + = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR ); + if (b) changed = True; + break; + case Te_TyVoid: + break; + default: + ML_(pp_TyEnt)(te); + vg_assert(0); + } + return changed; +} + +/* Make a pass over 'ents'. For each tyent, inspect the target of any + 'R' or 'Rs' fields (those which refer to other tyents), and replace + any which point to INDIR nodes with the target of the indirection + (which should not itself be an indirection). In summary, this + routine shorts out all references to indirection nodes. */ +static +Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents, + TyEntIndexCache* ents_cache ) +{ + Word i, n, nChanged = 0; + Bool b; + n = VG_(sizeXA)( ents ); + for (i = 0; i < n; i++) { + TyEnt* ent = VG_(indexXA)( ents, i ); + vg_assert(ent->tag != Te_EMPTY); + /* We have to substitute everything, even indirections, so as to + ensure that chains of indirections don't build up. */ + b = TyEnt__subst_R_fields( ents, ents_cache, ent ); + if (b) + nChanged++; + } + + return nChanged; +} + + +/* Make a pass over 'ents', building a dictionary of TyEnts as we go. + Look up each new tyent in the dictionary in turn. If it is already + in the dictionary, replace this tyent with an indirection to the + existing one, and delete any malloc'd stuff hanging off this one. + In summary, this routine commons up all tyents that are identical + as defined by TyEnt__cmp_by_all_except_cuOff. */ +static +Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents ) +{ + Word n, i, nDeleted; + WordFM* dict; /* TyEnt* -> void */ + TyEnt* ent; + UWord keyW, valW; + + dict = VG_(newFM)( + ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1", + ML_(dinfo_free), + (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff) + ); + + nDeleted = 0; + n = VG_(sizeXA)( ents ); + for (i = 0; i < n; i++) { + ent = VG_(indexXA)( ents, i ); + vg_assert(ent->tag != Te_EMPTY); + + /* Ignore indirections, although check that they are + not forming a cycle. */ + if (ent->tag == Te_INDIR) { + vg_assert(ent->Te.INDIR.indR < ent->cuOff); + continue; + } + + keyW = valW = 0; + if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) { + /* it's already in the dictionary. */ + TyEnt* old = (TyEnt*)keyW; + vg_assert(valW == 0); + vg_assert(old != ent); + vg_assert(old->tag != Te_INDIR); + /* since we are traversing the array in increasing order of + cuOff: */ + vg_assert(old->cuOff < ent->cuOff); + /* So anyway, dump this entry and replace it with an + indirection to the one in the dictionary. Note that the + assertion above guarantees that we cannot create cycles of + indirections, since we are always creating an indirection + to a tyent with a cuOff lower than this one. */ + ML_(TyEnt__make_EMPTY)( ent ); + ent->tag = Te_INDIR; + ent->Te.INDIR.indR = old->cuOff; + nDeleted++; + } else { + /* not in dictionary; add it and keep going. */ + VG_(addToFM)( dict, (UWord)ent, 0 ); + } + } + + VG_(deleteFM)( dict, NULL, NULL ); + + return nDeleted; +} + + +static +void dedup_types ( Bool td3, + /*MOD*/XArray* /* of TyEnt */ ents, + TyEntIndexCache* ents_cache ) +{ + Word m, n, i, nDel, nSubst, nThresh; + if (0) td3 = True; + + n = VG_(sizeXA)( ents ); + + /* If a commoning pass and a substitution pass both make fewer than + this many changes, just stop. It's pointless to burn up CPU + time trying to compress the last 1% or so out of the array. */ + nThresh = n / 200; + + /* First we must sort .ents by its .cuOff fields, so we + can index into it. */ + VG_(setCmpFnXA)( + ents, + (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) + ); + VG_(sortXA)( ents ); + + /* Now repeatedly do commoning and substitution passes over + the array, until there are no more changes. */ + do { + nDel = dedup_types_commoning_pass ( ents ); + nSubst = dedup_types_substitution_pass ( ents, ents_cache ); + vg_assert(nDel >= 0 && nSubst >= 0); + TRACE_D3(" %ld deletions, %ld substitutions\n", nDel, nSubst); + } while (nDel > nThresh || nSubst > nThresh); + + /* Sanity check: all INDIR nodes should point at a non-INDIR thing. + In fact this should be true at the end of every loop iteration + above (a commoning pass followed by a substitution pass), but + checking it on every iteration is excessively expensive. Note, + this loop also computes 'm' for the stats printing below it. */ + m = 0; + n = VG_(sizeXA)( ents ); + for (i = 0; i < n; i++) { + TyEnt *ent, *ind; + ent = VG_(indexXA)( ents, i ); + if (ent->tag != Te_INDIR) continue; + m++; + ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, + ent->Te.INDIR.indR ); + vg_assert(ind); + vg_assert(ind->tag != Te_INDIR); + } + + TRACE_D3("Overall: %ld before, %ld after\n", n, n-m); +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Resolution of references to type DIEs ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/* Make a pass through the (temporary) variables array. Examine the + type of each variable, check is it found, and chase any Te_INDIRs. + Postcondition is: each variable has a typeR field that refers to a + valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed + not to refer to a Te_INDIR. (This is so that we can throw all the + Te_INDIRs away later). */ + +__attribute__((noinline)) +static void resolve_variable_types ( + void (*barf)( HChar* ) __attribute__((noreturn)), + /*R-O*/XArray* /* of TyEnt */ ents, + /*MOD*/TyEntIndexCache* ents_cache, + /*MOD*/XArray* /* of TempVar* */ vars + ) +{ + Word i, n; + n = VG_(sizeXA)( vars ); + for (i = 0; i < n; i++) { + TempVar* var = *(TempVar**)VG_(indexXA)( vars, i ); + /* This is the stated type of the variable. But it might be + an indirection, so be careful. */ + TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, + var->typeR ); + if (ent && ent->tag == Te_INDIR) { + ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, + ent->Te.INDIR.indR ); + vg_assert(ent); + vg_assert(ent->tag != Te_INDIR); + } + + /* Deal first with "normal" cases */ + if (ent && ML_(TyEnt__is_type)(ent)) { + var->typeR = ent->cuOff; + continue; + } + + /* If there's no ent, it probably we did not manage to read a + type at the cuOffset which is stated as being this variable's + type. Maybe a deficiency in parse_type_DIE. Complain. */ + if (ent == NULL) { + VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR ); + barf("resolve_variable_types: " + "cuOff does not refer to a known type"); + } + vg_assert(ent); + /* If ent has any other tag, something bad happened, along the + lines of var->typeR not referring to a type at all. */ + vg_assert(ent->tag == Te_UNKNOWN); + /* Just accept it; the type will be useless, but at least keep + going. */ + var->typeR = ent->cuOff; + } +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Parsing of Compilation Units ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +static Int cmp_TempVar_by_dioff ( void* v1, void* v2 ) { + TempVar* t1 = *(TempVar**)v1; + TempVar* t2 = *(TempVar**)v2; + if (t1->dioff < t2->dioff) return -1; + if (t1->dioff > t2->dioff) return 1; + return 0; +} + +static void read_DIE ( + /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree, + /*MOD*/XArray* /* of TyEnt */ tyents, + /*MOD*/XArray* /* of TempVar* */ tempvars, + /*MOD*/XArray* /* of GExpr* */ gexprs, + /*MOD*/D3TypeParser* typarser, + /*MOD*/D3VarParser* varparser, + Cursor* c, Bool td3, CUConst* cc, Int level +) +{ + Cursor abbv; + ULong atag, abbv_code; + UWord posn; + UInt has_children; + UWord start_die_c_offset, start_abbv_c_offset; + UWord after_die_c_offset, after_abbv_c_offset; + + /* --- Deal with this DIE --- */ + posn = get_position_of_Cursor( c ); + abbv_code = get_ULEB128( c ); + set_abbv_Cursor( &abbv, td3, cc, abbv_code ); + atag = get_ULEB128( &abbv ); + TRACE_D3("\n"); + TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n", + level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) ); + + if (atag == 0) + cc->barf("read_DIE: invalid zero tag on DIE"); + + has_children = get_UChar( &abbv ); + if (has_children != DW_children_no && has_children != DW_children_yes) + cc->barf("read_DIE: invalid has_children value"); + + /* We're set up to look at the fields of this DIE. Hand it off to + any parser(s) that want to see it. Since they will in general + advance both the DIE and abbrev cursors, remember their current + settings so that we can then back up and do one final pass over + the DIE, to print out its contents. */ + + start_die_c_offset = get_position_of_Cursor( c ); + start_abbv_c_offset = get_position_of_Cursor( &abbv ); + + while (True) { + ULong cts; + Int ctsSzB; + UWord ctsMemSzB; + ULong at_name = get_ULEB128( &abbv ); + ULong at_form = get_ULEB128( &abbv ); + if (at_name == 0 && at_form == 0) break; + TRACE_D3(" %18s: ", ML_(pp_DW_AT)(at_name)); + /* Get the form contents, but ignore them; the only purpose is + to print them, if td3 is True */ + get_Form_contents( &cts, &ctsSzB, &ctsMemSzB, + cc, c, td3, (DW_FORM)at_form ); + TRACE_D3("\t"); + TRACE_D3("\n"); + } + + after_die_c_offset = get_position_of_Cursor( c ); + after_abbv_c_offset = get_position_of_Cursor( &abbv ); + + set_position_of_Cursor( c, start_die_c_offset ); + set_position_of_Cursor( &abbv, start_abbv_c_offset ); + + parse_type_DIE( tyents, + typarser, + (DW_TAG)atag, + posn, + level, + c, /* DIE cursor */ + &abbv, /* abbrev cursor */ + cc, + td3 ); + + set_position_of_Cursor( c, start_die_c_offset ); + set_position_of_Cursor( &abbv, start_abbv_c_offset ); + + parse_var_DIE( rangestree, + tempvars, + gexprs, + varparser, + (DW_TAG)atag, + posn, + level, + c, /* DIE cursor */ + &abbv, /* abbrev cursor */ + cc, + td3 ); + + set_position_of_Cursor( c, after_die_c_offset ); + set_position_of_Cursor( &abbv, after_abbv_c_offset ); + + /* --- Now recurse into its children, if any --- */ + if (has_children == DW_children_yes) { + if (0) TRACE_D3("BEGIN children of level %d\n", level); + while (True) { + atag = peek_ULEB128( c ); + if (atag == 0) break; + read_DIE( rangestree, tyents, tempvars, gexprs, + typarser, varparser, + c, td3, cc, level+1 ); + } + /* Now we need to eat the terminating zero */ + atag = get_ULEB128( c ); + vg_assert(atag == 0); + if (0) TRACE_D3("END children of level %d\n", level); + } + +} + + +static +void new_dwarf3_reader_wrk ( + struct _DebugInfo* di, + __attribute__((noreturn)) void (*barf)( HChar* ), + UChar* debug_info_img, SizeT debug_info_sz, + UChar* debug_abbv_img, SizeT debug_abbv_sz, + UChar* debug_line_img, SizeT debug_line_sz, + UChar* debug_str_img, SizeT debug_str_sz, + UChar* debug_ranges_img, SizeT debug_ranges_sz, + UChar* debug_loc_img, SizeT debug_loc_sz +) +{ + XArray* /* of TyEnt */ tyents; + XArray* /* of TyEnt */ tyents_to_keep; + XArray* /* of GExpr* */ gexprs; + XArray* /* of TempVar* */ tempvars; + WordFM* /* of (XArray* of AddrRange, void) */ rangestree; + TyEntIndexCache* tyents_cache = NULL; + TyEntIndexCache* tyents_to_keep_cache = NULL; + TempVar *varp, *varp2; + GExpr* gexpr; + Cursor abbv; /* for showing .debug_abbrev */ + Cursor info; /* primary cursor for parsing .debug_info */ + Cursor ranges; /* for showing .debug_ranges */ + D3TypeParser typarser; + D3VarParser varparser; + Addr dr_base; + UWord dr_offset; + Word i, j, n; + Bool td3 = di->trace_symtab; + XArray* /* of TempVar* */ dioff_lookup_tab; +#if 0 + /* This doesn't work properly because it assumes all entries are + packed end to end, with no holes. But that doesn't always + appear to be the case, so it loses sync. And the D3 spec + doesn't appear to require a no-hole situation either. */ + /* Display .debug_loc */ + Addr dl_base; + UWord dl_offset; + Cursor loc; /* for showing .debug_loc */ + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n"); + TRACE_SYMTAB(" Offset Begin End Expression\n"); + init_Cursor( &loc, debug_loc_img, + debug_loc_sz, 0, barf, + "Overrun whilst reading .debug_loc section(1)" ); + dl_base = 0; + dl_offset = 0; + while (True) { + UWord w1, w2; + UWord len; + if (is_at_end_Cursor( &loc )) + break; + + /* Read a (host-)word pair. This is something of a hack since + the word size to read is really dictated by the ELF file; + however, we assume we're reading a file with the same + word-sizeness as the host. Reasonably enough. */ + w1 = get_UWord( &loc ); + w2 = get_UWord( &loc ); + + if (w1 == 0 && w2 == 0) { + /* end of list. reset 'base' */ + TRACE_D3(" %08lx <End of list>\n", dl_offset); + dl_base = 0; + dl_offset = get_position_of_Cursor( &loc ); + continue; + } + + if (w1 == -1UL) { + /* new value for 'base' */ + TRACE_D3(" %08lx %16lx %08lx (base address)\n", + dl_offset, w1, w2); + dl_base = w2; + continue; + } + + /* else a location expression follows */ + TRACE_D3(" %08lx %08lx %08lx ", + dl_offset, w1 + dl_base, w2 + dl_base); + len = (UWord)get_UShort( &loc ); + while (len > 0) { + UChar byte = get_UChar( &loc ); + TRACE_D3("%02x", (UInt)byte); + len--; + } + TRACE_SYMTAB("\n"); + } +#endif + + /* Display .debug_ranges */ + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n"); + TRACE_SYMTAB(" Offset Begin End\n"); + init_Cursor( &ranges, debug_ranges_img, + debug_ranges_sz, 0, barf, + "Overrun whilst reading .debug_ranges section(1)" ); + dr_base = 0; + dr_offset = 0; + while (True) { + UWord w1, w2; + + if (is_at_end_Cursor( &ranges )) + break; + + /* Read a (host-)word pair. This is something of a hack since + the word size to read is really dictated by the ELF file; + however, we assume we're reading a file with the same + word-sizeness as the host. Reasonably enough. */ + w1 = get_UWord( &ranges ); + w2 = get_UWord( &ranges ); + + if (w1 == 0 && w2 == 0) { + /* end of list. reset 'base' */ + TRACE_D3(" %08lx <End of list>\n", dr_offset); + dr_base = 0; + dr_offset = get_position_of_Cursor( &ranges ); + continue; + } + + if (w1 == -1UL) { + /* new value for 'base' */ + TRACE_D3(" %08lx %16lx %08lx (base address)\n", + dr_offset, w1, w2); + dr_base = w2; + continue; + } + + /* else a range [w1+base, w2+base) is denoted */ + TRACE_D3(" %08lx %08lx %08lx\n", + dr_offset, w1 + dr_base, w2 + dr_base); + } + + /* Display .debug_abbrev */ + init_Cursor( &abbv, debug_abbv_img, debug_abbv_sz, 0, barf, + "Overrun whilst reading .debug_abbrev section" ); + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n"); + while (True) { + if (is_at_end_Cursor( &abbv )) + break; + /* Read one abbreviation table */ + TRACE_D3(" Number TAG\n"); + while (True) { + ULong atag; + UInt has_children; + ULong acode = get_ULEB128( &abbv ); + if (acode == 0) break; /* end of the table */ + atag = get_ULEB128( &abbv ); + has_children = get_UChar( &abbv ); + TRACE_D3(" %llu %s [%s]\n", + acode, ML_(pp_DW_TAG)(atag), + ML_(pp_DW_children)(has_children)); + while (True) { + ULong at_name = get_ULEB128( &abbv ); + ULong at_form = get_ULEB128( &abbv ); + if (at_name == 0 && at_form == 0) break; + TRACE_D3(" %18s %s\n", + ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form)); + } + } + } + TRACE_SYMTAB("\n"); + + /* Now loop over the Compilation Units listed in the .debug_info + section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation + unit contains a Compilation Unit Header followed by precisely + one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */ + init_Cursor( &info, debug_info_img, debug_info_sz, 0, barf, + "Overrun whilst reading .debug_info section" ); + + /* We'll park the harvested type information in here. Also create + a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always + have at least one type entry to refer to. D3_FAKEVOID_CUOFF is + huge and presumably will not occur in any valid DWARF3 file -- + it would need to have a .debug_info section 4GB long for that to + happen. These type entries end up in the DebugInfo. */ + tyents = VG_(newXA)( ML_(dinfo_zalloc), + "di.readdwarf3.ndrw.1 (TyEnt temp array)", + ML_(dinfo_free), sizeof(TyEnt) ); + { TyEnt tyent; + VG_(memset)(&tyent, 0, sizeof(tyent)); + tyent.tag = Te_TyVoid; + tyent.cuOff = D3_FAKEVOID_CUOFF; + tyent.Te.TyVoid.isFake = True; + VG_(addToXA)( tyents, &tyent ); + } + { TyEnt tyent; + VG_(memset)(&tyent, 0, sizeof(tyent)); + tyent.tag = Te_UNKNOWN; + tyent.cuOff = D3_INVALID_CUOFF; + VG_(addToXA)( tyents, &tyent ); + } + + /* This is a tree used to unique-ify the range lists that are + manufactured by parse_var_DIE. References to the keys in the + tree wind up in .rngMany fields in TempVars. We'll need to + delete this tree, and the XArrays attached to it, at the end of + this function. */ + rangestree = VG_(newFM)( ML_(dinfo_zalloc), + "di.readdwarf3.ndrw.2 (rangestree)", + ML_(dinfo_free), + (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange ); + + /* List of variables we're accumulating. These don't end up in the + DebugInfo; instead their contents are handed to ML_(addVar) and + the list elements are then deleted. */ + tempvars = VG_(newXA)( ML_(dinfo_zalloc), + "di.readdwarf3.ndrw.3 (TempVar*s array)", + ML_(dinfo_free), + sizeof(TempVar*) ); + + /* List of GExprs we're accumulating. These wind up in the + DebugInfo. */ + gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4", + ML_(dinfo_free), sizeof(GExpr*) ); + + /* We need a D3TypeParser to keep track of partially constructed + types. It'll be discarded as soon as we've completed the CU, + since the resulting information is tipped in to 'tyents' as it + is generated. */ + VG_(memset)( &typarser, 0, sizeof(typarser) ); + typarser.sp = -1; + typarser.language = '?'; + for (i = 0; i < N_D3_TYPE_STACK; i++) { + typarser.qparentE[i].tag = Te_EMPTY; + typarser.qparentE[i].cuOff = D3_INVALID_CUOFF; + } + + VG_(memset)( &varparser, 0, sizeof(varparser) ); + varparser.sp = -1; + + TRACE_D3("\n------ Parsing .debug_info section ------\n"); + while (True) { + UWord cu_start_offset, cu_offset_now; + CUConst cc; + /* It may be that the stated size of this CU is larger than the + amount of stuff actually in it. icc9 seems to generate CUs + thusly. We use these variables to figure out if this is + indeed the case, and if so how many bytes we need to skip to + get to the start of the next CU. Not skipping those bytes + causes us to misidentify the start of the next CU, and it all + goes badly wrong after that (not surprisingly). */ + UWord cu_size_including_IniLen, cu_amount_used; + + /* It seems icc9 finishes the DIE info before debug_info_sz + bytes have been used up. So be flexible, and declare the + sequence complete if there is not enough remaining bytes to + hold even the smallest conceivable CU header. (11 bytes I + reckon). */ + /* JRS 23Jan09: I suspect this is no longer necessary now that + the code below contains a 'while (cu_amount_used < + cu_size_including_IniLen ...' style loop, which skips over + any leftover bytes at the end of a CU in the case where the + CU's stated size is larger than its actual size (as + determined by reading all its DIEs). However, for prudence, + I'll leave the following test in place. I can't see that a + CU header can be smaller than 11 bytes, so I don't think + there's any harm possible through the test -- it just adds + robustness. */ + Word avail = get_remaining_length_Cursor( &info ); + if (avail < 11) { + if (avail > 0) + TRACE_D3("new_dwarf3_reader_wrk: warning: " + "%ld unused bytes after end of DIEs\n", avail); + break; + } + + /* Check the varparser's stack is in a sane state. */ + vg_assert(varparser.sp == -1); + for (i = 0; i < N_D3_VAR_STACK; i++) { + vg_assert(varparser.ranges[i] == NULL); + vg_assert(varparser.level[i] == 0); + } + for (i = 0; i < N_D3_TYPE_STACK; i++) { + vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF); + vg_assert(typarser.qparentE[i].tag == Te_EMPTY); + vg_assert(typarser.qlevel[i] == 0); + } + + cu_start_offset = get_position_of_Cursor( &info ); + TRACE_D3("\n"); + TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset); + /* parse_CU_header initialises the CU's set_abbv_Cursor cache + (saC_cache) */ + parse_CU_Header( &cc, td3, &info, + (UChar*)debug_abbv_img, debug_abbv_sz ); + cc.debug_str_img = debug_str_img; + cc.debug_str_sz = debug_str_sz; + cc.debug_ranges_img = debug_ranges_img; + cc.debug_ranges_sz = debug_ranges_sz; + cc.debug_loc_img = debug_loc_img; + cc.debug_loc_sz = debug_loc_sz; + cc.debug_line_img = debug_line_img; + cc.debug_line_sz = debug_line_sz; + cc.debug_info_img = debug_info_img; + cc.debug_info_sz = debug_info_sz; + cc.cu_start_offset = cu_start_offset; + cc.di = di; + /* The CU's svma can be deduced by looking at the AT_low_pc + value in the top level TAG_compile_unit, which is the topmost + DIE. We'll leave it for the 'varparser' to acquire that info + and fill it in -- since it is the only party to want to know + it. */ + cc.cu_svma_known = False; + cc.cu_svma = 0; + + /* Create a fake outermost-level range covering the entire + address range. So we always have *something* to catch all + variable declarations. */ + varstack_push( &cc, &varparser, td3, + unitary_range_list(0UL, ~0UL), + -1, False/*isFunc*/, NULL/*fbGX*/ ); + + /* And set up the file name table. When we come across the top + level DIE for this CU (which is what the next call to + read_DIE should process) we will copy all the file names out + of the .debug_line img area and use this table to look up the + copies when we later see filename numbers in DW_TAG_variables + etc. */ + vg_assert(!varparser.filenameTable ); + varparser.filenameTable + = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5", + ML_(dinfo_free), + sizeof(UChar*) ); + vg_assert(varparser.filenameTable); + + /* Now read the one-and-only top-level DIE for this CU. */ + vg_assert(varparser.sp == 0); + read_DIE( rangestree, + tyents, tempvars, gexprs, + &typarser, &varparser, + &info, td3, &cc, 0 ); + + cu_offset_now = get_position_of_Cursor( &info ); + + if (0) VG_(printf)("Travelled: %lu size %llu\n", + cu_offset_now - cc.cu_start_offset, + cc.unit_length + (cc.is_dw64 ? 12 : 4)); + + /* How big the CU claims it is .. */ + cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4); + /* .. vs how big we have found it to be */ + cu_amount_used = cu_offset_now - cc.cu_start_offset; + + if (1) TRACE_D3("offset now %ld, d-i-size %ld\n", + cu_offset_now, debug_info_sz); + if (cu_offset_now > debug_info_sz) + barf("toplevel DIEs beyond end of CU"); + + /* If the CU is bigger than it claims to be, we've got a serious + problem. */ + if (cu_amount_used > cu_size_including_IniLen) + barf("CU's actual size appears to be larger than it claims it is"); + + /* If the CU is smaller than it claims to be, we need to skip some + bytes. Loop updates cu_offset_new and cu_amount_used. */ + while (cu_amount_used < cu_size_including_IniLen + && get_remaining_length_Cursor( &info ) > 0) { + if (0) VG_(printf)("SKIP\n"); + (void)get_UChar( &info ); + cu_offset_now = get_position_of_Cursor( &info ); + cu_amount_used = cu_offset_now - cc.cu_start_offset; + } + + if (cu_offset_now == debug_info_sz) + break; + + /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur + anywhere else at all. Our fake the-entire-address-space + range is at level -1, so preening to -2 should completely + empty the stack out. */ + TRACE_D3("\n"); + varstack_preen( &varparser, td3, -2 ); + /* Similarly, empty the type stack out. */ + typestack_preen( &typarser, td3, -2 ); + /* else keep going */ + + TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n", + cc.saC_cache_queries, cc.saC_cache_misses); + + vg_assert(varparser.filenameTable ); + VG_(deleteXA)( varparser.filenameTable ); + varparser.filenameTable = NULL; + } + + /* From here on we're post-processing the stuff we got + out of the .debug_info section. */ + if (td3) { + TRACE_D3("\n"); + ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array"); + TRACE_D3("\n"); + TRACE_D3("------ Compressing type entries ------\n"); + } + + tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6", + sizeof(TyEntIndexCache) ); + ML_(TyEntIndexCache__invalidate)( tyents_cache ); + dedup_types( td3, tyents, tyents_cache ); + if (td3) { + TRACE_D3("\n"); + ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression"); + } + + TRACE_D3("\n"); + TRACE_D3("------ Resolving the types of variables ------\n" ); + resolve_variable_types( barf, tyents, tyents_cache, tempvars ); + + /* Copy all the non-INDIR tyents into a new table. For large + .so's, about 90% of the tyents will by now have been resolved to + INDIRs, and we no longer need them, and so don't need to store + them. */ + tyents_to_keep + = VG_(newXA)( ML_(dinfo_zalloc), + "di.readdwarf3.ndrw.7 (TyEnt to-keep array)", + ML_(dinfo_free), sizeof(TyEnt) ); + n = VG_(sizeXA)( tyents ); + for (i = 0; i < n; i++) { + TyEnt* ent = VG_(indexXA)( tyents, i ); + if (ent->tag != Te_INDIR) + VG_(addToXA)( tyents_to_keep, ent ); + } + + VG_(deleteXA)( tyents ); + tyents = NULL; + ML_(dinfo_free)( tyents_cache ); + tyents_cache = NULL; + + /* Sort tyents_to_keep so we can lookup in it. A complete (if + minor) waste of time, since tyents itself is sorted, but + necessary since VG_(lookupXA) refuses to cooperate if we + don't. */ + VG_(setCmpFnXA)( + tyents_to_keep, + (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only) + ); + VG_(sortXA)( tyents_to_keep ); + + /* Enable cacheing on tyents_to_keep */ + tyents_to_keep_cache + = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8", + sizeof(TyEntIndexCache) ); + ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache ); + + /* And record the tyents in the DebugInfo. We do this before + starting to hand variables to ML_(addVar), since if ML_(addVar) + wants to do debug printing (of the types of said vars) then it + will need the tyents.*/ + vg_assert(!di->admin_tyents); + di->admin_tyents = tyents_to_keep; + + /* Bias all the location expressions. */ + TRACE_D3("\n"); + TRACE_D3("------ Biasing the location expressions ------\n" ); + + n = VG_(sizeXA)( gexprs ); + for (i = 0; i < n; i++) { + gexpr = *(GExpr**)VG_(indexXA)( gexprs, i ); + bias_GX( gexpr, di ); + } + + TRACE_D3("\n"); + TRACE_D3("------ Acquired the following variables: ------\n\n"); + + /* Park (pointers to) all the vars in an XArray, so we can look up + abstract origins quickly. The array is sorted (hence, looked-up + by) the .dioff fields. Since the .dioffs should be in strictly + ascending order, there is no need to sort the array after + construction. The ascendingness is however asserted for. */ + dioff_lookup_tab + = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9", + ML_(dinfo_free), + sizeof(TempVar*) ); + vg_assert(dioff_lookup_tab); + + n = VG_(sizeXA)( tempvars ); + for (i = 0; i < n; i++) { + varp = *(TempVar**)VG_(indexXA)( tempvars, i ); + if (i > 0) { + varp2 = *(TempVar**)VG_(indexXA)( tempvars, i-1 ); + /* why should this hold? Only, I think, because we've + constructed the array by reading .debug_info sequentially, + and so the array .dioff fields should reflect that, and be + strictly ascending. */ + vg_assert(varp2->dioff < varp->dioff); + } + VG_(addToXA)( dioff_lookup_tab, &varp ); + } + VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff ); + VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */ + + /* Now visit each var. Collect up as much info as possible for + each var and hand it to ML_(addVar). */ + n = VG_(sizeXA)( tempvars ); + for (j = 0; j < n; j++) { + TyEnt* ent; + varp = *(TempVar**)VG_(indexXA)( tempvars, j ); + + /* Possibly show .. */ + if (td3) { + VG_(printf)("<%lx> addVar: level %d: %s :: ", + varp->dioff, + varp->level, + varp->name ? varp->name : (UChar*)"<anon_var>" ); + if (varp->typeR) { + ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR ); + } else { + VG_(printf)("NULL"); + } + VG_(printf)("\n Loc="); + if (varp->gexpr) { + ML_(pp_GX)(varp->gexpr); + } else { + VG_(printf)("NULL"); + } + VG_(printf)("\n"); + if (varp->fbGX) { + VG_(printf)(" FrB="); + ML_(pp_GX)( varp->fbGX ); + VG_(printf)("\n"); + } else { + VG_(printf)(" FrB=none\n"); + } + VG_(printf)(" declared at: %s:%d\n", + varp->fName ? varp->fName : (UChar*)"NULL", + varp->fLine ); + if (varp->absOri != (UWord)D3_INVALID_CUOFF) + VG_(printf)(" abstract origin: <%lx>\n", varp->absOri); + } + + /* Skip variables which have no location. These must be + abstract instances; they are useless as-is since with no + location they have no specified memory location. They will + presumably be referred to via the absOri fields of other + variables. */ + if (!varp->gexpr) { + TRACE_D3(" SKIP (no location)\n\n"); + continue; + } + + /* So it has a location, at least. If it refers to some other + entry through its absOri field, pull in further info through + that. */ + if (varp->absOri != (UWord)D3_INVALID_CUOFF) { + Bool found; + Word ixFirst, ixLast; + TempVar key; + TempVar* keyp = &key; + TempVar *varAI; + VG_(memset)(&key, 0, sizeof(key)); /* not necessary */ + key.dioff = varp->absOri; /* this is what we want to find */ + found = VG_(lookupXA)( dioff_lookup_tab, &keyp, + &ixFirst, &ixLast ); + if (!found) + barf("DW_AT_abstract_origin can't be resolved"); + /* If the following fails, there is more than one entry with + the same dioff. Which can't happen. */ + vg_assert(ixFirst == ixLast); + varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst ); + /* stay sane */ + vg_assert(varAI); + vg_assert(varAI->dioff == varp->absOri); + + /* Copy what useful info we can. */ + if (varAI->typeR && !varp->typeR) + varp->typeR = varAI->typeR; + if (varAI->name && !varp->name) + varp->name = varAI->name; + if (varAI->fName && !varp->fName) + varp->fName = varAI->fName; + if (varAI->fLine > 0 && varp->fLine == 0) + varp->fLine = varAI->fLine; + } + + /* Give it a name if it doesn't have one. */ + if (!varp->name) + varp->name = ML_(addStr)( di, "<anon_var>", -1 ); + + /* So now does it have enough info to be useful? */ + /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then + the type didn't get resolved. Really, in that case + something's broken earlier on, and should be fixed, rather + than just skipping the variable. */ + ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep, + tyents_to_keep_cache, + varp->typeR ); + /* The next two assertions should be guaranteed by + our previous call to resolve_variable_types. */ + vg_assert(ent); + vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN); + + if (ent->tag == Te_UNKNOWN) continue; + + vg_assert(varp->gexpr); + vg_assert(varp->name); + vg_assert(varp->typeR); + vg_assert(varp->level >= 0); + + /* Ok. So we're going to keep it. Call ML_(addVar) once for + each address range in which the variable exists. */ + TRACE_D3(" ACQUIRE for range(s) "); + { AddrRange oneRange; + AddrRange* varPcRanges; + Word nVarPcRanges; + /* Set up to iterate over address ranges, however + represented. */ + if (varp->nRanges == 0 || varp->nRanges == 1) { + vg_assert(!varp->rngMany); + if (varp->nRanges == 0) { + vg_assert(varp->rngOneMin == 0); + vg_assert(varp->rngOneMax == 0); + } + nVarPcRanges = varp->nRanges; + oneRange.aMin = varp->rngOneMin; + oneRange.aMax = varp->rngOneMax; + varPcRanges = &oneRange; + } else { + vg_assert(varp->rngMany); + vg_assert(varp->rngOneMin == 0); + vg_assert(varp->rngOneMax == 0); + nVarPcRanges = VG_(sizeXA)(varp->rngMany); + vg_assert(nVarPcRanges >= 2); + vg_assert(nVarPcRanges == (Word)varp->nRanges); + varPcRanges = VG_(indexXA)(varp->rngMany, 0); + } + if (varp->level == 0) + vg_assert( nVarPcRanges == 1 ); + /* and iterate */ + for (i = 0; i < nVarPcRanges; i++) { + Addr pcMin = varPcRanges[i].aMin; + Addr pcMax = varPcRanges[i].aMax; + vg_assert(pcMin <= pcMax); + /* Level 0 is the global address range. So at level 0 we + don't want to bias pcMin/pcMax; but at all other levels + we do since those are derived from svmas in the Dwarf + we're reading. Be paranoid ... */ + if (varp->level == 0) { + vg_assert(pcMin == (Addr)0); + vg_assert(pcMax == ~(Addr)0); + } else { + /* vg_assert(pcMin > (Addr)0); + No .. we can legitimately expect to see ranges like + 0x0-0x11D (pre-biasing, of course). */ + vg_assert(pcMax < ~(Addr)0); + } + + /* Apply text biasing, for non-global variables. */ + if (varp->level > 0) { + pcMin += di->text_debug_bias; + pcMax += di->text_debug_bias; + } + + if (i > 0 && (i%2) == 0) + TRACE_D3("\n "); + TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax ); + + ML_(addVar)( + di, varp->level, + pcMin, pcMax, + varp->name, varp->typeR, + varp->gexpr, varp->fbGX, + varp->fName, varp->fLine, td3 + ); + } + } + + TRACE_D3("\n\n"); + /* and move on to the next var */ + } + + /* Now free all the TempVars */ + n = VG_(sizeXA)( tempvars ); + for (i = 0; i < n; i++) { + varp = *(TempVar**)VG_(indexXA)( tempvars, i ); + ML_(dinfo_free)(varp); + } + VG_(deleteXA)( tempvars ); + tempvars = NULL; + + /* and the temp lookup table */ + VG_(deleteXA)( dioff_lookup_tab ); + + /* and the ranges tree. Note that we need to also free the XArrays + which constitute the keys, hence pass VG_(deleteXA) as a + key-finalizer. */ + VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL ); + + /* and the tyents_to_keep cache */ + ML_(dinfo_free)( tyents_to_keep_cache ); + tyents_to_keep_cache = NULL; + + /* and the file name table (just the array, not the entries + themselves). (Apparently, 2008-Oct-23, varparser.filenameTable + can be NULL here, for icc9 generated Dwarf3. Not sure what that + signifies (a deeper problem with the reader?)) */ + if (varparser.filenameTable) { + VG_(deleteXA)( varparser.filenameTable ); + varparser.filenameTable = NULL; + } + + /* record the GExprs in di so they can be freed later */ + vg_assert(!di->admin_gexprs); + di->admin_gexprs = gexprs; +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- The "new" DWARF3 reader -- top level control logic ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ +#include <setjmp.h> /* For jmp_buf */ +/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ + +static Bool d3rd_jmpbuf_valid = False; +static HChar* d3rd_jmpbuf_reason = NULL; +static jmp_buf d3rd_jmpbuf; + +static __attribute__((noreturn)) void barf ( HChar* reason ) { + vg_assert(d3rd_jmpbuf_valid); + d3rd_jmpbuf_reason = reason; + __builtin_longjmp(&d3rd_jmpbuf, 1); + /*NOTREACHED*/ + vg_assert(0); +} + + +void +ML_(new_dwarf3_reader) ( + struct _DebugInfo* di, + UChar* debug_info_img, SizeT debug_info_sz, + UChar* debug_abbv_img, SizeT debug_abbv_sz, + UChar* debug_line_img, SizeT debug_line_sz, + UChar* debug_str_img, SizeT debug_str_sz, + UChar* debug_ranges_img, SizeT debug_ranges_sz, + UChar* debug_loc_img, SizeT debug_loc_sz +) +{ + volatile Int jumped; + volatile Bool td3 = di->trace_symtab; + + /* Run the _wrk function to read the dwarf3. If it succeeds, it + just returns normally. If there is any failure, it longjmp's + back here, having first set d3rd_jmpbuf_reason to something + useful. */ + vg_assert(d3rd_jmpbuf_valid == False); + vg_assert(d3rd_jmpbuf_reason == NULL); + + d3rd_jmpbuf_valid = True; + jumped = __builtin_setjmp(&d3rd_jmpbuf); + if (jumped == 0) { + /* try this ... */ + new_dwarf3_reader_wrk( di, barf, + debug_info_img, debug_info_sz, + debug_abbv_img, debug_abbv_sz, + debug_line_img, debug_line_sz, + debug_str_img, debug_str_sz, + debug_ranges_img, debug_ranges_sz, + debug_loc_img, debug_loc_sz ); + d3rd_jmpbuf_valid = False; + TRACE_D3("\n------ .debug_info reading was successful ------\n"); + } else { + /* It longjmp'd. */ + d3rd_jmpbuf_valid = False; + /* Can't longjump without giving some sort of reason. */ + vg_assert(d3rd_jmpbuf_reason != NULL); + + TRACE_D3("\n------ .debug_info reading failed ------\n"); + + ML_(symerr)(di, True, d3rd_jmpbuf_reason); + } + + d3rd_jmpbuf_valid = False; + d3rd_jmpbuf_reason = NULL; +} + + + +/* --- Unused code fragments which might be useful one day. --- */ + +#if 0 + /* Read the arange tables */ + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n"); + init_Cursor( &aranges, debug_aranges_img, + debug_aranges_sz, 0, barf, + "Overrun whilst reading .debug_aranges section" ); + while (True) { + ULong len, d_i_offset; + Bool is64; + UShort version; + UChar asize, segsize; + + if (is_at_end_Cursor( &aranges )) + break; + /* Read one arange thingy */ + /* initial_length field */ + len = get_Initial_Length( &is64, &aranges, + "in .debug_aranges: invalid initial-length field" ); + version = get_UShort( &aranges ); + d_i_offset = get_Dwarfish_UWord( &aranges, is64 ); + asize = get_UChar( &aranges ); + segsize = get_UChar( &aranges ); + TRACE_D3(" Length: %llu\n", len); + TRACE_D3(" Version: %d\n", (Int)version); + TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset); + TRACE_D3(" Pointer Size: %d\n", (Int)asize); + TRACE_D3(" Segment Size: %d\n", (Int)segsize); + TRACE_D3("\n"); + TRACE_D3(" Address Length\n"); + + while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) { + (void)get_UChar( & aranges ); + } + while (True) { + ULong address = get_Dwarfish_UWord( &aranges, asize==8 ); + ULong length = get_Dwarfish_UWord( &aranges, asize==8 ); + TRACE_D3(" 0x%016llx 0x%llx\n", address, length); + if (address == 0 && length == 0) break; + } + } + TRACE_SYMTAB("\n"); +#endif + +/*--------------------------------------------------------------------*/ +/*--- end readdwarf3.c ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/readelf.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/readelf.c.svn-base new file mode 100644 index 0000000..e0eef8c --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/readelf.c.svn-base @@ -0,0 +1,2054 @@ + +/*--------------------------------------------------------------------*/ +/*--- Reading of syms & debug info from ELF .so/executable files. ---*/ +/*--- readelf.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +#include "pub_core_basics.h" +#include "pub_core_vki.h" +#include "pub_core_debuginfo.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcprint.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcfile.h" +#include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */ +#include "pub_core_machine.h" /* VG_ELF_CLASS */ +#include "pub_core_options.h" +#include "pub_core_oset.h" +#include "pub_core_tooliface.h" /* VG_(needs) */ +#include "pub_core_xarray.h" +#include "priv_misc.h" /* dinfo_zalloc/free/strdup */ +#include "priv_d3basics.h" +#include "priv_tytypes.h" +#include "priv_storage.h" +#include "priv_readelf.h" /* self */ +#include "priv_readdwarf.h" /* 'cos ELF contains DWARF */ +#include "priv_readdwarf3.h" +#include "priv_readstabs.h" /* and stabs, if we're unlucky */ + +/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ +#include <elf.h> +/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ + +/*------------------------------------------------------------*/ +/*--- 32/64-bit parameterisation ---*/ +/*------------------------------------------------------------*/ + +/* For all the ELF macros and types which specify '32' or '64', + select the correct variant for this platform and give it + an 'XX' name. Then use the 'XX' variant consistently in + the rest of this file. +*/ +#if VG_WORDSIZE == 4 +# define ElfXX_Ehdr Elf32_Ehdr +# define ElfXX_Shdr Elf32_Shdr +# define ElfXX_Phdr Elf32_Phdr +# define ElfXX_Sym Elf32_Sym +# define ElfXX_Word Elf32_Word +# define ElfXX_Addr Elf32_Addr +# define ElfXX_Dyn Elf32_Dyn +# define ELFXX_ST_BIND ELF32_ST_BIND +# define ELFXX_ST_TYPE ELF32_ST_TYPE + +#elif VG_WORDSIZE == 8 +# define ElfXX_Ehdr Elf64_Ehdr +# define ElfXX_Shdr Elf64_Shdr +# define ElfXX_Phdr Elf64_Phdr +# define ElfXX_Sym Elf64_Sym +# define ElfXX_Word Elf64_Word +# define ElfXX_Addr Elf64_Addr +# define ElfXX_Dyn Elf64_Dyn +# define ELFXX_ST_BIND ELF64_ST_BIND +# define ELFXX_ST_TYPE ELF64_ST_TYPE + +#else +# error "VG_WORDSIZE should be 4 or 8" +#endif + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Read symbol table and line info from ELF files. ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/* readelf.c parses ELF files and acquires symbol table info from + them. It calls onwards to readdwarf.c to read DWARF2/3 line number + and call frame info found. */ + + +/* Identify an ELF object file by peering at the first few bytes of + it. */ + +Bool ML_(is_elf_object_file)( void* image, SizeT n_image ) +{ + ElfXX_Ehdr* ehdr = (ElfXX_Ehdr*)image; + Int ok = 1; + + if (n_image < sizeof(ElfXX_Ehdr)) + return False; + + ok &= (ehdr->e_ident[EI_MAG0] == 0x7F + && ehdr->e_ident[EI_MAG1] == 'E' + && ehdr->e_ident[EI_MAG2] == 'L' + && ehdr->e_ident[EI_MAG3] == 'F'); + ok &= (ehdr->e_ident[EI_CLASS] == VG_ELF_CLASS + && ehdr->e_ident[EI_DATA] == VG_ELF_DATA2XXX + && ehdr->e_ident[EI_VERSION] == EV_CURRENT); + ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN); + ok &= (ehdr->e_machine == VG_ELF_MACHINE); + ok &= (ehdr->e_version == EV_CURRENT); + ok &= (ehdr->e_shstrndx != SHN_UNDEF); + ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0); + ok &= (ehdr->e_phoff != 0 && ehdr->e_phnum != 0); + + if (ok) + return True; + else + return False; +} + + +/* Show a raw ELF symbol, given its in-image address and name. */ + +static +void show_raw_elf_symbol ( Int i, + ElfXX_Sym* sym, Char* sym_name, Addr sym_svma, + Bool ppc64_linux_format ) +{ + HChar* space = ppc64_linux_format ? " " : ""; + VG_(printf)("raw symbol [%4d]: ", i); + switch (ELFXX_ST_BIND(sym->st_info)) { + case STB_LOCAL: VG_(printf)("LOC "); break; + case STB_GLOBAL: VG_(printf)("GLO "); break; + case STB_WEAK: VG_(printf)("WEA "); break; + case STB_LOPROC: VG_(printf)("lop "); break; + case STB_HIPROC: VG_(printf)("hip "); break; + default: VG_(printf)("??? "); break; + } + switch (ELFXX_ST_TYPE(sym->st_info)) { + case STT_NOTYPE: VG_(printf)("NOT "); break; + case STT_OBJECT: VG_(printf)("OBJ "); break; + case STT_FUNC: VG_(printf)("FUN "); break; + case STT_SECTION: VG_(printf)("SEC "); break; + case STT_FILE: VG_(printf)("FIL "); break; + case STT_LOPROC: VG_(printf)("lop "); break; + case STT_HIPROC: VG_(printf)("hip "); break; + default: VG_(printf)("??? "); break; + } + VG_(printf)(": svma %#010lx, %ssz %4ld %s\n", + sym_svma, space, sym->st_size + 0UL, + ( sym->st_name ? sym_name : (Char*)"NONAME" ) ); +} + + +/* Decide whether SYM is something we should collect, and if so, copy + relevant info to the _OUT arguments. For {x86,amd64,ppc32}-linux + this is straightforward - the name, address, size are copied out + unchanged. + + There is a bit of a kludge re data symbols (see KLUDGED BSS CHECK + below): we assume that the .bss is mapped immediately after .data, + and so accept any data symbol which exists in the range [start of + .data, size of .data + size of .bss). I don't know if this is + really correct/justifiable, or not. + + For ppc64-linux it's more complex. If the symbol is seen to be in + the .opd section, it is taken to be a function descriptor, and so + a dereference is attempted, in order to get hold of the real entry + point address. Also as part of the dereference, there is an attempt + to calculate the TOC pointer (R2 value) associated with the symbol. + + To support the ppc64-linux pre-"dotless" ABI (prior to gcc 4.0.0), + if the symbol is seen to be outside the .opd section and its name + starts with a dot, an .opd deference is not attempted, and no TOC + pointer is calculated, but the the leading dot is removed from the + name. + + As a result, on ppc64-linux, the caller of this function may have + to piece together the real size, address, name of the symbol from + multiple calls to this function. Ugly and confusing. +*/ +static +Bool get_elf_symbol_info ( + /* INPUTS */ + struct _DebugInfo* di, /* containing DebugInfo */ + ElfXX_Sym* sym, /* ELF symbol */ + Char* sym_name, /* name */ + Addr sym_svma, /* address as stated in the object file */ + Bool symtab_in_debug, /* symbol table is in the debug file */ + UChar* opd_img, /* oimage of .opd sec (ppc64-linux only) */ + PtrdiffT opd_bias, /* for biasing AVMAs found in .opd */ + /* OUTPUTS */ + Char** sym_name_out, /* name we should record */ + Addr* sym_avma_out, /* addr we should record */ + Int* sym_size_out, /* symbol size */ + Addr* sym_tocptr_out, /* ppc64-linux only: R2 value to be + used on entry */ + Bool* from_opd_out, /* ppc64-linux only: did we deref an + .opd entry? */ + Bool* is_text_out /* is this a text symbol? */ + ) +{ + Bool plausible; +# if defined(VGP_ppc64_linux) + Bool is_in_opd; +# endif + Bool in_text, in_data, in_sdata, in_rodata, in_bss, in_sbss; + Addr text_svma, data_svma, sdata_svma, rodata_svma, bss_svma, sbss_svma; + PtrdiffT text_bias, data_bias, sdata_bias, rodata_bias, bss_bias, sbss_bias; + + /* Set defaults */ + *sym_name_out = sym_name; + *sym_avma_out = sym_svma; /* we will bias this shortly */ + *is_text_out = True; + *sym_size_out = (Int)sym->st_size; + *sym_tocptr_out = 0; /* unknown/inapplicable */ + *from_opd_out = False; + + /* Figure out if we're interested in the symbol. Firstly, is it of + the right flavour? */ + plausible + = (ELFXX_ST_BIND(sym->st_info) == STB_GLOBAL + || ELFXX_ST_BIND(sym->st_info) == STB_LOCAL + || ELFXX_ST_BIND(sym->st_info) == STB_WEAK + ) + && + (ELFXX_ST_TYPE(sym->st_info) == STT_FUNC + || ELFXX_ST_TYPE(sym->st_info) == STT_OBJECT + ); + + /* Work out the svma and bias for each section as it will appear in + addresses in the symbol table. */ + if (symtab_in_debug) { + text_svma = di->text_debug_svma; + text_bias = di->text_debug_bias; + data_svma = di->data_debug_svma; + data_bias = di->data_debug_bias; + sdata_svma = di->sdata_debug_svma; + sdata_bias = di->sdata_debug_bias; + rodata_svma = di->rodata_debug_svma; + rodata_bias = di->rodata_debug_bias; + bss_svma = di->bss_debug_svma; + bss_bias = di->bss_debug_bias; + sbss_svma = di->sbss_debug_svma; + sbss_bias = di->sbss_debug_bias; + } else { + text_svma = di->text_svma; + text_bias = di->text_bias; + data_svma = di->data_svma; + data_bias = di->data_bias; + sdata_svma = di->sdata_svma; + sdata_bias = di->sdata_bias; + rodata_svma = di->rodata_svma; + rodata_bias = di->rodata_bias; + bss_svma = di->bss_svma; + bss_bias = di->bss_bias; + sbss_svma = di->sbss_svma; + sbss_bias = di->sbss_bias; + } + + /* Now bias sym_avma_out accordingly by figuring out exactly which + section the symbol is from and bias accordingly. Screws up if + the previously deduced section svma address ranges are wrong. */ + if (di->text_present + && di->text_size > 0 + && sym_svma >= text_svma + && sym_svma < text_svma + di->text_size) { + *is_text_out = True; + *sym_avma_out += text_bias; + } else + if (di->data_present + && di->data_size > 0 + && sym_svma >= data_svma + && sym_svma < data_svma + di->data_size) { + *is_text_out = False; + *sym_avma_out += data_bias; + } else + if (di->sdata_present + && di->sdata_size > 0 + && sym_svma >= sdata_svma + && sym_svma < sdata_svma + di->sdata_size) { + *is_text_out = False; + *sym_avma_out += sdata_bias; + } else + if (di->rodata_present + && di->rodata_size > 0 + && sym_svma >= rodata_svma + && sym_svma < rodata_svma + di->rodata_size) { + *is_text_out = False; + *sym_avma_out += rodata_bias; + } else + if (di->bss_present + && di->bss_size > 0 + && sym_svma >= bss_svma + && sym_svma < bss_svma + di->bss_size) { + *is_text_out = False; + *sym_avma_out += bss_bias; + } else + if (di->sbss_present + && di->sbss_size > 0 + && sym_svma >= sbss_svma + && sym_svma < sbss_svma + di->sbss_size) { + *is_text_out = False; + *sym_avma_out += sbss_bias; + } else { + /* Assume it's in .text. Is this a good idea? */ + *is_text_out = True; + *sym_avma_out += text_bias; + } + +# if defined(VGP_ppc64_linux) + /* Allow STT_NOTYPE in the very special case where we're running on + ppc64-linux and the symbol is one which the .opd-chasing hack + below will chase. */ + if (!plausible + && *is_text_out + && ELFXX_ST_TYPE(sym->st_info) == STT_NOTYPE + && sym->st_size > 0 + && di->opd_present + && di->opd_size > 0 + && *sym_avma_out >= di->opd_avma + && *sym_avma_out < di->opd_avma + di->opd_size) + plausible = True; +# endif + + if (!plausible) + return False; + + /* Ignore if nameless, or zero-sized. */ + if (sym->st_name == (ElfXX_Word)0 + || /* VG_(strlen)(sym_name) == 0 */ + /* equivalent but cheaper ... */ + sym_name[0] == 0 + || sym->st_size == 0) { + TRACE_SYMTAB(" ignore -- size=0: %s\n", sym_name); + return False; + } + + /* This seems to significantly reduce the number of junk + symbols, and particularly reduces the number of + overlapping address ranges. Don't ask me why ... */ + if ((Int)sym->st_value == 0) { + TRACE_SYMTAB( " ignore -- valu=0: %s\n", sym_name); + return False; + } + + /* If it's apparently in a GOT or PLT, it's really a reference to a + symbol defined elsewhere, so ignore it. */ + if (di->got_present + && di->got_size > 0 + && *sym_avma_out >= di->got_avma + && *sym_avma_out < di->got_avma + di->got_size) { + TRACE_SYMTAB(" ignore -- in GOT: %s\n", sym_name); + return False; + } + if (di->plt_present + && di->plt_size > 0 + && *sym_avma_out >= di->plt_avma + && *sym_avma_out < di->plt_avma + di->plt_size) { + TRACE_SYMTAB(" ignore -- in PLT: %s\n", sym_name); + return False; + } + + /* ppc64-linux nasty hack: if the symbol is in an .opd section, + then really what we have is the address of a function + descriptor. So use the first word of that as the function's + text. + + See thread starting at + http://gcc.gnu.org/ml/gcc-patches/2004-08/msg00557.html + */ +# if defined(VGP_ppc64_linux) + is_in_opd = False; +# endif + + if (di->opd_present + && di->opd_size > 0 + && *sym_avma_out >= di->opd_avma + && *sym_avma_out < di->opd_avma + di->opd_size) { +# if !defined(VGP_ppc64_linux) + TRACE_SYMTAB(" ignore -- in OPD: %s\n", sym_name); + return False; +# else + Int offset_in_opd; + ULong* fn_descr; + Bool details = 1||False; + + if (details) + TRACE_SYMTAB("opdXXX: opd_bias %p, sym_svma_out %p\n", + (void*)(opd_bias), (void*)*sym_avma_out); + + if (!VG_IS_8_ALIGNED(*sym_avma_out)) { + TRACE_SYMTAB(" ignore -- not 8-aligned: %s\n", sym_name); + return False; + } + + /* *sym_avma_out is a vma pointing into the .opd section. We + know the vma of the opd section start, so we can figure out + how far into the opd section this is. */ + + offset_in_opd = (Addr)(*sym_avma_out) - (Addr)(di->opd_avma); + if (offset_in_opd < 0 || offset_in_opd >= di->opd_size) { + TRACE_SYMTAB(" ignore -- invalid OPD offset: %s\n", sym_name); + return False; + } + + /* Now we want to know what's at that offset in the .opd + section. We can't look in the running image since it won't + necessarily have been mapped. But we can consult the oimage. + opd_img is the start address of the .opd in the oimage. + Hence: */ + + fn_descr = (ULong*)(opd_img + offset_in_opd); + + if (details) + TRACE_SYMTAB("opdXXY: offset %d, fn_descr %p\n", + offset_in_opd, fn_descr); + if (details) + TRACE_SYMTAB("opdXXZ: *fn_descr %p\n", (void*)(fn_descr[0])); + + /* opd_bias is the what we have to add to SVMAs found in .opd to + get plausible .text AVMAs for the entry point, and .data + AVMAs (presumably) for the TOC locations. We use the caller + supplied value (which is di->text_bias) for both of these. + Not sure why that is correct - it seems to work, and sounds + OK for fn_descr[0], but surely we need to use the data bias + and not the text bias for fn_descr[1] ? Oh Well. + */ + *sym_avma_out = fn_descr[0] + opd_bias; + *sym_tocptr_out = fn_descr[1] + opd_bias; + *from_opd_out = True; + is_in_opd = True; + + /* Do a final sanity check: if the symbol falls outside the + DebugInfo's mapped range, ignore it. Since *sym_avma_out has + been updated, that can be achieved simply by falling through + to the test below. */ + +# endif /* ppc64-linux nasty hack */ + } + + /* Here's yet another ppc64-linux hack. Get rid of leading dot if + the symbol is outside .opd. */ +# if defined(VGP_ppc64_linux) + if (di->opd_size > 0 + && !is_in_opd + && sym_name[0] == '.') { + vg_assert(!(*from_opd_out)); + *sym_name_out = &sym_name[1]; + } +# endif + + /* If no part of the symbol falls within the mapped range, + ignore it. */ + + in_text + = di->text_present + && di->text_size > 0 + && !(*sym_avma_out + *sym_size_out <= di->text_avma + || *sym_avma_out >= di->text_avma + di->text_size); + + in_data + = di->data_present + && di->data_size > 0 + && !(*sym_avma_out + *sym_size_out <= di->data_avma + || *sym_avma_out >= di->data_avma + di->data_size); + + in_sdata + = di->sdata_present + && di->sdata_size > 0 + && !(*sym_avma_out + *sym_size_out <= di->sdata_avma + || *sym_avma_out >= di->sdata_avma + di->sdata_size); + + in_rodata + = di->rodata_present + && di->rodata_size > 0 + && !(*sym_avma_out + *sym_size_out <= di->rodata_avma + || *sym_avma_out >= di->rodata_avma + di->rodata_size); + + in_bss + = di->bss_present + && di->bss_size > 0 + && !(*sym_avma_out + *sym_size_out <= di->bss_avma + || *sym_avma_out >= di->bss_avma + di->bss_size); + + in_sbss + = di->sbss_present + && di->sbss_size > 0 + && !(*sym_avma_out + *sym_size_out <= di->sbss_avma + || *sym_avma_out >= di->sbss_avma + di->sbss_size); + + + if (*is_text_out) { + /* This used to reject any symbol falling outside the text + segment ("if (!in_text) ..."). Now it is relaxed slightly, + to reject only symbols which fall outside the area mapped + r-x. This is in accordance with r7427. See + "Comment_Regarding_Text_Range_Checks" in storage.c for + background. */ + Bool in_rx; + vg_assert(di->have_rx_map); + in_rx = (!(*sym_avma_out + *sym_size_out <= di->rx_map_avma + || *sym_avma_out >= di->rx_map_avma + di->rx_map_size)); + if (in_text) + vg_assert(in_rx); + if (!in_rx) { + TRACE_SYMTAB( + "ignore -- %#lx .. %#lx outside .text svma range %#lx .. %#lx\n", + *sym_avma_out, *sym_avma_out + *sym_size_out, + di->text_avma, + di->text_avma + di->text_size); + return False; + } + } else { + if (!(in_data || in_sdata || in_rodata || in_bss || in_sbss)) { + TRACE_SYMTAB( + "ignore -- %#lx .. %#lx outside .data / .sdata / .rodata / .bss / .sbss svma ranges\n", + *sym_avma_out, *sym_avma_out + *sym_size_out); + return False; + } + } + +# if defined(VGP_ppc64_linux) + /* It's crucial that we never add symbol addresses in the .opd + section. This would completely mess up function redirection and + intercepting. This assert ensures that anysymbols that make it + into the symbol table on ppc64-linux don't point into .opd. */ + if (di->opd_present && di->opd_size > 0) { + vg_assert(*sym_avma_out + *sym_size_out <= di->opd_avma + || *sym_avma_out >= di->opd_avma + di->opd_size); + } +# endif + + /* Acquire! */ + return True; +} + + +/* Read an ELF symbol table (normal or dynamic). This one is for the + "normal" case ({x86,amd64,ppc32}-linux). */ +static +__attribute__((unused)) /* not referred to on all targets */ +void read_elf_symtab__normal( + struct _DebugInfo* di, UChar* tab_name, + ElfXX_Sym* symtab_img, SizeT symtab_szB, + UChar* strtab_img, SizeT strtab_szB, + Bool symtab_in_debug, + UChar* opd_img /* ppc64-linux only */ + ) +{ + Word i; + Addr sym_svma, sym_avma_really; + Char *sym_name, *sym_name_really; + Int sym_size; + Addr sym_tocptr; + Bool from_opd, is_text; + DiSym risym; + ElfXX_Sym *sym; + + if (strtab_img == NULL || symtab_img == NULL) { + Char buf[80]; + vg_assert(VG_(strlen)(tab_name) < 40); + VG_(sprintf)(buf, " object doesn't have a %s", tab_name); + ML_(symerr)(di, False, buf); + return; + } + + TRACE_SYMTAB("\n--- Reading (ELF, standard) %s (%ld entries) ---\n", + tab_name, symtab_szB/sizeof(ElfXX_Sym) ); + + /* Perhaps should start at i = 1; ELF docs suggest that entry + 0 always denotes 'unknown symbol'. */ + for (i = 1; i < (Word)(symtab_szB/sizeof(ElfXX_Sym)); i++) { + sym = & symtab_img[i]; + sym_name = (UChar*)(strtab_img + sym->st_name); + sym_svma = sym->st_value; + + if (di->trace_symtab) + show_raw_elf_symbol(i, sym, sym_name, sym_svma, False); + + if (get_elf_symbol_info(di, sym, sym_name, sym_svma, + symtab_in_debug, + opd_img, di->text_bias, + &sym_name_really, + &sym_avma_really, + &sym_size, + &sym_tocptr, + &from_opd, &is_text)) { + + risym.addr = sym_avma_really; + risym.size = sym_size; + risym.name = ML_(addStr) ( di, sym_name_really, -1 ); + risym.tocptr = sym_tocptr; + risym.isText = is_text; + vg_assert(risym.name != NULL); + vg_assert(risym.tocptr == 0); /* has no role except on ppc64-linux */ + ML_(addSym) ( di, &risym ); + + if (di->trace_symtab) { + VG_(printf)(" rec(%c) [%4ld]: " + " val %#010lx, sz %4d %s\n", + is_text ? 't' : 'd', + i, + risym.addr, + (Int)risym.size, + (HChar*)risym.name + ); + } + + } + } +} + + +/* Read an ELF symbol table (normal or dynamic). This one is for + ppc64-linux, which requires special treatment. */ + +typedef + struct { + Addr addr; + UChar* name; + } + TempSymKey; + +typedef + struct { + TempSymKey key; + Addr tocptr; + Int size; + Bool from_opd; + Bool is_text; + } + TempSym; + +static Word cmp_TempSymKey ( TempSymKey* key1, TempSym* elem2 ) { + if (key1->addr < elem2->key.addr) return -1; + if (key1->addr > elem2->key.addr) return 1; + return (Word)VG_(strcmp)(key1->name, elem2->key.name); +} + +static +__attribute__((unused)) /* not referred to on all targets */ +void read_elf_symtab__ppc64_linux( + struct _DebugInfo* di, UChar* tab_name, + ElfXX_Sym* symtab_img, SizeT symtab_szB, + UChar* strtab_img, SizeT strtab_szB, + Bool symtab_in_debug, + UChar* opd_img /* ppc64-linux only */ + ) +{ + Word i; + Int old_size; + Addr sym_svma, sym_avma_really; + Char *sym_name, *sym_name_really; + Int sym_size; + Addr sym_tocptr; + Bool from_opd, modify_size, modify_tocptr, is_text; + DiSym risym; + ElfXX_Sym *sym; + OSet *oset; + TempSymKey key; + TempSym *elem; + TempSym *prev; + + if (strtab_img == NULL || symtab_img == NULL) { + Char buf[80]; + vg_assert(VG_(strlen)(tab_name) < 40); + VG_(sprintf)(buf, " object doesn't have a %s", tab_name); + ML_(symerr)(di, False, buf); + return; + } + + TRACE_SYMTAB("\n--- Reading (ELF, ppc64-linux) %s (%ld entries) ---\n", + tab_name, symtab_szB/sizeof(ElfXX_Sym) ); + + oset = VG_(OSetGen_Create)( offsetof(TempSym,key), + (OSetCmp_t)cmp_TempSymKey, + ML_(dinfo_zalloc), "di.respl.1", + ML_(dinfo_free) ); + vg_assert(oset); + + /* Perhaps should start at i = 1; ELF docs suggest that entry + 0 always denotes 'unknown symbol'. */ + for (i = 1; i < (Word)(symtab_szB/sizeof(ElfXX_Sym)); i++) { + sym = & symtab_img[i]; + sym_name = (Char*)(strtab_img + sym->st_name); + sym_svma = sym->st_value; + + if (di->trace_symtab) + show_raw_elf_symbol(i, sym, sym_name, sym_svma, True); + + if (get_elf_symbol_info(di, sym, sym_name, sym_svma, + symtab_in_debug, + opd_img, di->text_bias, + &sym_name_really, + &sym_avma_really, + &sym_size, + &sym_tocptr, + &from_opd, &is_text)) { + + /* Check if we've seen this (name,addr) key before. */ + key.addr = sym_avma_really; + key.name = sym_name_really; + prev = VG_(OSetGen_Lookup)( oset, &key ); + + if (prev) { + + /* Seen it before. Fold in whatever new info we can. */ + modify_size = False; + modify_tocptr = False; + old_size = 0; + + if (prev->from_opd && !from_opd + && (prev->size == 24 || prev->size == 16) + && sym_size != prev->size) { + /* Existing one is an opd-redirect, with a bogus size, + so the only useful new fact we have is the real size + of the symbol. */ + modify_size = True; + old_size = prev->size; + prev->size = sym_size; + } + else + if (!prev->from_opd && from_opd + && (sym_size == 24 || sym_size == 16)) { + /* Existing one is non-opd, new one is opd. What we + can acquire from the new one is the TOC ptr to be + used. Since the existing sym is non-toc, it + shouldn't currently have an known TOC ptr. */ + vg_assert(prev->tocptr == 0); + modify_tocptr = True; + prev->tocptr = sym_tocptr; + } + else { + /* ignore. can we do better here? */ + } + + /* Only one or the other is possible (I think) */ + vg_assert(!(modify_size && modify_tocptr)); + + if (modify_size && di->trace_symtab) { + VG_(printf)(" modify (old sz %4d) " + " val %#010lx, toc %#010lx, sz %4d %s\n", + old_size, + prev->key.addr, + prev->tocptr, + (Int) prev->size, + (HChar*)prev->key.name + ); + } + if (modify_tocptr && di->trace_symtab) { + VG_(printf)(" modify (upd tocptr) " + " val %#010lx, toc %#010lx, sz %4d %s\n", + prev->key.addr, + prev->tocptr, + (Int) prev->size, + (HChar*)prev->key.name + ); + } + + } else { + + /* A new (name,addr) key. Add and continue. */ + elem = VG_(OSetGen_AllocNode)(oset, sizeof(TempSym)); + vg_assert(elem); + elem->key = key; + elem->tocptr = sym_tocptr; + elem->size = sym_size; + elem->from_opd = from_opd; + elem->is_text = is_text; + VG_(OSetGen_Insert)(oset, elem); + if (di->trace_symtab) { + VG_(printf)(" to-oset [%4ld]: " + " val %#010lx, toc %#010lx, sz %4d %s\n", + i, + elem->key.addr, + elem->tocptr, + (Int) elem->size, + (HChar*)elem->key.name + ); + } + + } + } + } + + /* All the syms that matter are in the oset. Now pull them out, + build a "standard" symbol table, and nuke the oset. */ + + i = 0; + VG_(OSetGen_ResetIter)( oset ); + + while ( (elem = VG_(OSetGen_Next)(oset)) ) { + risym.addr = elem->key.addr; + risym.size = elem->size; + risym.name = ML_(addStr) ( di, elem->key.name, -1 ); + risym.tocptr = elem->tocptr; + risym.isText = elem->is_text; + vg_assert(risym.name != NULL); + + ML_(addSym) ( di, &risym ); + if (di->trace_symtab) { + VG_(printf)(" rec(%c) [%4ld]: " + " val %#010lx, toc %#010lx, sz %4d %s\n", + risym.isText ? 't' : 'd', + i, + risym.addr, + risym.tocptr, + (Int) risym.size, + (HChar*)risym.name + ); + } + i++; + } + + VG_(OSetGen_Destroy)( oset ); +} + + +/* + * This routine for calculating the CRC for a separate debug file + * is GPLed code borrowed from GNU binutils. + */ +static UInt +calc_gnu_debuglink_crc32(UInt crc, const UChar *buf, Int len) +{ + static const UInt crc32_table[256] = + { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d + }; + const UChar *end; + + crc = ~crc & 0xffffffff; + for (end = buf + len; buf < end; ++ buf) + crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8); + return ~crc & 0xffffffff;; +} + +/* + * Try and open a separate debug file, ignoring any where the CRC does + * not match the value from the main object file. + */ +static +Addr open_debug_file( Char* name, UInt crc, /*OUT*/UWord* size ) +{ + SysRes fd, sres; + struct vg_stat stat_buf; + UInt calccrc; + + fd = VG_(open)(name, VKI_O_RDONLY, 0); + if (fd.isError) + return 0; + + if (VG_(fstat)(fd.res, &stat_buf) != 0) { + VG_(close)(fd.res); + return 0; + } + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, "Reading debug info from %s ..", name); + + *size = stat_buf.st_size; + + sres = VG_(am_mmap_file_float_valgrind) + ( *size, VKI_PROT_READ, fd.res, 0 ); + + VG_(close)(fd.res); + + if (sres.isError) + return 0; + + calccrc = calc_gnu_debuglink_crc32(0, (UChar*)sres.res, *size); + if (calccrc != crc) { + SysRes res = VG_(am_munmap_valgrind)(sres.res, *size); + vg_assert(!res.isError); + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, + ".. CRC mismatch (computed %08x wanted %08x)", calccrc, crc); + return 0; + } + + return sres.res; +} + +/* + * Try to find a separate debug file for a given object file. + */ +static +Addr find_debug_file( struct _DebugInfo* di, + Char* objpath, Char* debugname, + UInt crc, /*OUT*/UWord* size ) +{ + Char *objdir = ML_(dinfo_strdup)("di.fdf.1", objpath); + Char *objdirptr; + Char *debugpath; + Addr addr = 0; + + if ((objdirptr = VG_(strrchr)(objdir, '/')) != NULL) + *objdirptr = '\0'; + + debugpath = ML_(dinfo_zalloc)( + "di.fdf.2", + VG_(strlen)(objdir) + VG_(strlen)(debugname) + 32); + + VG_(sprintf)(debugpath, "%s/%s", objdir, debugname); + + if ((addr = open_debug_file(debugpath, crc, size)) == 0) { + VG_(sprintf)(debugpath, "%s/.debug/%s", objdir, debugname); + if ((addr = open_debug_file(debugpath, crc, size)) == 0) { + VG_(sprintf)(debugpath, "/usr/lib/debug%s/%s", objdir, debugname); + addr = open_debug_file(debugpath, crc, size); + } + } + + if (addr) { + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("------ Found a debuginfo file: %s\n", debugpath); + } + + ML_(dinfo_free)(debugpath); + ML_(dinfo_free)(objdir); + + return addr; +} + + +static Bool contained_within ( Addr outer, UWord n_outer, + Addr inner, UWord n_inner ) +{ + if (n_outer == 0 || n_inner == 0) + return False; + /* Simplistic .. assumes no wraparound (reasonably enough) */ + if (inner >= outer && inner+n_inner <= outer+n_outer) + return True; + return False; +} + +static void* INDEX_BIS ( void* base, Word idx, Word scale ) { + return (void*)( ((UChar*)base) + idx * scale ); +} + + +/* Find the file offset corresponding to SVMA by using the program + headers. This is taken from binutils-2.17/binutils/readelf.c + offset_from_vma(). */ +static +Word file_offset_from_svma ( /*OUT*/Bool* ok, + Addr svma, + ElfXX_Phdr* phdr_img, + Word phdr_nent, + Word phdr_ent_szB ) +{ + Word i; + ElfXX_Phdr* seg; + for (i = 0; i < phdr_nent; i++) { + seg = INDEX_BIS( phdr_img, i, phdr_ent_szB ); + if (seg->p_type != PT_LOAD) + continue; + if (svma >= (seg->p_vaddr & -seg->p_align) + && svma + 1 <= seg->p_vaddr + seg->p_filesz) { + *ok = True; + return svma - seg->p_vaddr + seg->p_offset; + } + } + *ok = False; + return 0; +} + +/* The central function for reading ELF debug info. For the + object/exe specified by the DebugInfo, find ELF sections, then read + the symbols, line number info, file name info, CFA (stack-unwind + info) and anything else we want, into the tables within the + supplied DebugInfo. +*/ +Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) +{ + Bool res, ok; + SysRes fd, sres; + Word i; + + /* Image addresses for the ELF file we're working with. */ + Addr oimage = 0; + UWord n_oimage = 0; + + /* Ditto for any ELF debuginfo file that we might happen to load. */ + Addr dimage = 0; + UWord n_dimage = 0; + + /* ELF header for the main file. Should == oimage since is at + start of file. */ + ElfXX_Ehdr* ehdr_img = NULL; + + /* Program header table image addr, # entries, entry size */ + ElfXX_Phdr* phdr_img = NULL; + UWord phdr_nent = 0; + UWord phdr_ent_szB = 0; + + /* Section header image addr, # entries, entry size. Also the + associated string table. */ + ElfXX_Shdr* shdr_img = NULL; + UWord shdr_nent = 0; + UWord shdr_ent_szB = 0; + UChar* shdr_strtab_img = NULL; + + /* SVMAs covered by rx and rw segments and corresponding bias. */ + Addr rx_svma_base = 0; + Addr rx_svma_limit = 0; + PtrdiffT rx_bias = 0; + Addr rw_svma_base = 0; + Addr rw_svma_limit = 0; + PtrdiffT rw_bias = 0; + + vg_assert(di); + vg_assert(di->have_rx_map == True); + vg_assert(di->have_rw_map == True); + vg_assert(di->rx_map_size > 0); + vg_assert(di->rw_map_size > 0); + vg_assert(di->have_dinfo == False); + vg_assert(di->filename); + vg_assert(!di->memname); + vg_assert(!di->symtab); + vg_assert(!di->loctab); + vg_assert(!di->cfsi); + vg_assert(!di->cfsi_exprs); + vg_assert(!di->strchunks); + vg_assert(!di->soname); + + /* If these don't hold true, it means that m_syswrap/m_aspacemgr + managed to do a mapping where the start isn't page aligned. + Which sounds pretty bogus to me. */ + vg_assert(VG_IS_PAGE_ALIGNED(di->rx_map_avma)); + vg_assert(VG_IS_PAGE_ALIGNED(di->rw_map_avma)); + + /* ---------------------------------------------------------- + At this point, there is very little information in the + DebugInfo. We only know that something that looks like an ELF + file has been mapped rx-ishly as recorded with the di->*rx_map* + fields and has also been mapped rw-ishly as recorded with the + di->*rw_map* fields. First we examine the file's ELF Program + Header, and, by comparing that against the di->*r{w,x}_map* + info, try to figure out the AVMAs for the sections we care + about, that should have been mapped: text, data, sdata, bss got, + plt, and toc. + ---------------------------------------------------------- */ + + res = False; + + oimage = (Addr)NULL; + if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir)) + VG_(message)(Vg_DebugMsg, "Reading syms from %s (%#lx)", + di->filename, di->rx_map_avma ); + + /* mmap the object image aboard, so that we can read symbols and + line number info out of it. It will be munmapped immediately + thereafter; it is only aboard transiently. */ + + fd = VG_(open)(di->filename, VKI_O_RDONLY, 0); + if (fd.isError) { + ML_(symerr)(di, True, "Can't open .so/.exe to read symbols?!"); + return False; + } + + { Long n_oimageLL = VG_(fsize)(fd.res); + if (n_oimageLL <= 0) { + ML_(symerr)(di, True, "Can't stat .so/.exe (to determine its size)?!"); + VG_(close)(fd.res); + return False; + } + n_oimage = (UWord)(ULong)n_oimageLL; + } + + sres = VG_(am_mmap_file_float_valgrind) + ( n_oimage, VKI_PROT_READ, fd.res, 0 ); + + VG_(close)(fd.res); + + if (sres.isError) { + VG_(message)(Vg_UserMsg, "warning: mmap failed on %s", di->filename ); + VG_(message)(Vg_UserMsg, " no symbols or debug info loaded" ); + return False; + } + + oimage = sres.res; + /* Check against wraparound. am_mmap_file_float_valgrind should + not produce a wrapped-around mapping. */ + vg_assert(n_oimage > 0); + vg_assert(oimage + n_oimage > oimage); + + if (0) { + VG_(printf)("read_elf_debug_info: OIMAGE = %p - %p\n", + (void*)oimage, (void*)(oimage + (UWord)n_oimage)); + } + + /* Ok, the object image is safely in oimage[0 .. n_oimage-1]. Now + verify that it is a valid ELF .so or executable image. */ + res = False; + ok = (n_oimage >= sizeof(ElfXX_Ehdr)); + ehdr_img = (ElfXX_Ehdr*)oimage; + + if (ok) + ok &= ML_(is_elf_object_file)(ehdr_img, n_oimage); + + if (!ok) { + ML_(symerr)(di, True, "Invalid ELF Header"); + goto out; + } + + /* Find where the program and section header tables are, and give + up if either is missing or outside the image (bogus). */ + phdr_img = (ElfXX_Phdr*)( ((UChar*)ehdr_img) + ehdr_img->e_phoff ); + phdr_nent = ehdr_img->e_phnum; + phdr_ent_szB = ehdr_img->e_phentsize; + + shdr_img = (ElfXX_Shdr*)( ((UChar*)ehdr_img) + ehdr_img->e_shoff ); + shdr_nent = ehdr_img->e_shnum; + shdr_ent_szB = ehdr_img->e_shentsize; + + TRACE_SYMTAB("------ Basic facts about the object ------\n"); + TRACE_SYMTAB("object: img %p n_oimage %ld\n", + (void*)oimage, n_oimage); + TRACE_SYMTAB("phdr: img %p nent %ld ent_szB %ld\n", + phdr_img, phdr_nent, phdr_ent_szB); + TRACE_SYMTAB("shdr: img %p nent %ld ent_szB %ld\n", + shdr_img, shdr_nent, shdr_ent_szB); + + if (phdr_nent == 0 + || !contained_within( + oimage, n_oimage, + (Addr)phdr_img, phdr_nent * phdr_ent_szB)) { + ML_(symerr)(di, True, "Missing or invalid ELF Program Header Table"); + goto out; + } + + if (shdr_nent == 0 + || !contained_within( + oimage, n_oimage, + (Addr)shdr_img, shdr_nent * shdr_ent_szB)) { + ML_(symerr)(di, True, "Missing or invalid ELF Section Header Table"); + goto out; + } + + /* Also find the section header's string table, and validate. */ + /* checked previously by is_elf_object_file: */ + vg_assert( ehdr_img->e_shstrndx != SHN_UNDEF ); + + shdr_strtab_img + = (UChar*)( ((UChar*)ehdr_img) + + shdr_img[ehdr_img->e_shstrndx].sh_offset); + if (!contained_within( oimage, n_oimage, + (Addr)shdr_strtab_img, + 1/*bogus, but we don't know the real size*/ )) { + ML_(symerr)(di, True, "Invalid ELF Section Header String Table"); + goto out; + } + + TRACE_SYMTAB("shdr: string table at %p\n", shdr_strtab_img ); + + /* Do another amazingly tedious thing: find out the .soname for + this object. Apparently requires looking through the program + header table. */ + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("------ Looking for the soname ------\n"); + vg_assert(di->soname == NULL); + { + ElfXX_Addr prev_svma = 0; + + for (i = 0; i < phdr_nent; i++) { + ElfXX_Phdr* phdr = INDEX_BIS( phdr_img, i, phdr_ent_szB ); + + /* Make sure the PT_LOADable entries are in order */ + if (phdr->p_type == PT_LOAD) { + TRACE_SYMTAB("PT_LOAD in order?: %#lx %#lx\n", + prev_svma + 0UL, + phdr->p_vaddr + 0UL); + if (phdr->p_vaddr < prev_svma) { + ML_(symerr)(di, True, + "ELF Program Headers are not in ascending order"); + goto out; + } + prev_svma = phdr->p_vaddr; + if (rx_svma_limit == 0 + && phdr->p_offset >= di->rx_map_foff + && phdr->p_offset < di->rx_map_foff + di->rx_map_size + && phdr->p_offset + phdr->p_filesz <= di->rx_map_foff + di->rx_map_size) { + rx_svma_base = phdr->p_vaddr; + rx_svma_limit = phdr->p_vaddr + phdr->p_memsz; + rx_bias = di->rx_map_avma - di->rx_map_foff + phdr->p_offset - phdr->p_vaddr; + } + else if (rw_svma_limit == 0 + && phdr->p_offset >= di->rw_map_foff + && phdr->p_offset < di->rw_map_foff + di->rw_map_size + && phdr->p_offset + phdr->p_filesz <= di->rw_map_foff + di->rw_map_size) { + rw_svma_base = phdr->p_vaddr; + rw_svma_limit = phdr->p_vaddr + phdr->p_memsz; + rw_bias = di->rw_map_avma - di->rw_map_foff + phdr->p_offset - phdr->p_vaddr; + } + } + + /* Try to get the soname. If there isn't one, use "NONE". + The seginfo needs to have some kind of soname in order to + facilitate writing redirect functions, since all redirect + specifications require a soname (pattern). */ + if (phdr->p_type == PT_DYNAMIC && di->soname == NULL) { + ElfXX_Dyn* dyn_img = (ElfXX_Dyn*)( ((UChar*)ehdr_img) + + phdr->p_offset); + Word stroff = -1; + UChar* strtab = NULL; + Word j; + for (j = 0; dyn_img[j].d_tag != DT_NULL; j++) { + switch (dyn_img[j].d_tag) { + case DT_SONAME: { + stroff = dyn_img[j].d_un.d_val; + break; + } + case DT_STRTAB: { + Bool ok2 = False; + Word offset = file_offset_from_svma( + &ok2, + dyn_img[j].d_un.d_ptr, + phdr_img, + phdr_nent, phdr_ent_szB + ); + if (ok2 && strtab == NULL) { + vg_assert(offset >= 0 && offset <= n_oimage); + strtab = ((UChar*)ehdr_img) + offset; + } + break; + } + default: + break; + } + } + if (stroff != -1 && strtab != NULL) { + TRACE_SYMTAB("Found soname = %s\n", strtab+stroff); + di->soname = ML_(dinfo_strdup)("di.redi.1", strtab+stroff); + } + } + } /* for (i = 0; i < phdr_nent; i++) ... */ + } /* look for the soname */ + + /* If, after looking at all the program headers, we still didn't + find a soname, add a fake one. */ + if (di->soname == NULL) { + TRACE_SYMTAB("No soname found; using (fake) \"NONE\"\n"); + di->soname = "NONE"; + } + + vg_assert(rx_svma_limit != 0); + vg_assert(rw_svma_limit != 0); + + /* Now read the section table. */ + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("------ Examining the section headers " + "and program headers ------\n"); + TRACE_SYMTAB("rx: at %#lx are mapped foffsets %ld .. %ld\n", + di->rx_map_avma, + di->rx_map_foff, di->rx_map_foff + di->rx_map_size - 1 ); + TRACE_SYMTAB("rx: contains svmas %#lx .. %#lx with bias %#lx\n", + rx_svma_base, rx_svma_limit - 1, rx_bias ); + TRACE_SYMTAB("rw: at %#lx are mapped foffsets %ld .. %ld\n", + di->rw_map_avma, + di->rw_map_foff, di->rw_map_foff + di->rw_map_size - 1 ); + TRACE_SYMTAB("rw: contains svmas %#lx .. %#lx with bias %#lx\n", + rw_svma_base, rw_svma_limit - 1, rw_bias ); + + for (i = 0; i < shdr_nent; i++) { + ElfXX_Shdr* shdr = INDEX_BIS( shdr_img, i, shdr_ent_szB ); + UChar* name = shdr_strtab_img + shdr->sh_name; + Addr svma = shdr->sh_addr; + OffT foff = shdr->sh_offset; + UWord size = shdr->sh_size; + UInt alyn = shdr->sh_addralign; + Bool bits = !(shdr->sh_type == SHT_NOBITS); + Bool inrx = svma >= rx_svma_base && svma < rx_svma_limit; + Bool inrw = svma >= rw_svma_base && svma < rw_svma_limit; + + TRACE_SYMTAB(" [sec %2ld] %s %s al%2u foff %6ld .. %6ld " + " svma %p name \"%s\"\n", + i, inrx ? "rx" : " ", inrw ? "rw" : " ", alyn, + foff, foff+size-1, (void*)svma, name ); + + /* Check for sane-sized segments. SHT_NOBITS sections have zero + size in the file. */ + if ((foff >= n_oimage) || (foff + (bits ? size : 0) > n_oimage)) { + ML_(symerr)(di, True, "ELF Section extends beyond image end"); + goto out; + } + + /* Check for a sane alignment value. */ + if (alyn > 0 && -1 == VG_(log2)(alyn)) { + ML_(symerr)(di, True, "ELF Section contains invalid " + ".sh_addralign value"); + goto out; + } + +# define BAD(_secname) \ + do { ML_(symerr)(di, True, \ + "Can't make sense of " _secname \ + " section mapping"); \ + goto out; \ + } while (0) + + /* Find avma-s for: .text .data .sdata .rodata .bss .sbss .plt .got .opd + and .eh_frame */ + + /* Accept .text where mapped as rx (code), even if zero-sized */ + if (0 == VG_(strcmp)(name, ".text")) { + if (inrx && size >= 0 && !di->text_present) { + di->text_present = True; + di->text_svma = svma; + di->text_avma = svma + rx_bias; + di->text_size = size; + di->text_bias = rx_bias; + di->text_debug_svma = svma; + di->text_debug_bias = rx_bias; + TRACE_SYMTAB("acquiring .text svma = %#lx .. %#lx\n", + di->text_svma, + di->text_svma + di->text_size - 1); + TRACE_SYMTAB("acquiring .text avma = %#lx .. %#lx\n", + di->text_avma, + di->text_avma + di->text_size - 1); + TRACE_SYMTAB("acquiring .text bias = %#lx\n", di->text_bias); + } else { + BAD(".text"); + } + } + + /* Accept .data where mapped as rw (data), even if zero-sized */ + if (0 == VG_(strcmp)(name, ".data")) { + if (inrw && size >= 0 && !di->data_present) { + di->data_present = True; + di->data_svma = svma; + di->data_avma = svma + rw_bias; + di->data_size = size; + di->data_bias = rw_bias; + di->data_debug_svma = svma; + di->data_debug_bias = rw_bias; + TRACE_SYMTAB("acquiring .data svma = %#lx .. %#lx\n", + di->data_svma, + di->data_svma + di->data_size - 1); + TRACE_SYMTAB("acquiring .data avma = %#lx .. %#lx\n", + di->data_avma, + di->data_avma + di->data_size - 1); + TRACE_SYMTAB("acquiring .data bias = %#lx\n", di->data_bias); + } else { + BAD(".data"); + } + } + + /* Accept .sdata where mapped as rw (data) */ + if (0 == VG_(strcmp)(name, ".sdata")) { + if (inrw && size > 0 && !di->sdata_present) { + di->sdata_present = True; + di->sdata_svma = svma; + di->sdata_avma = svma + rw_bias; + di->sdata_size = size; + di->sdata_bias = rw_bias; + di->sdata_debug_svma = svma; + di->sdata_debug_bias = rw_bias; + TRACE_SYMTAB("acquiring .sdata svma = %#lx .. %#lx\n", + di->sdata_svma, + di->sdata_svma + di->sdata_size - 1); + TRACE_SYMTAB("acquiring .sdata avma = %#lx .. %#lx\n", + di->sdata_avma, + di->sdata_avma + di->sdata_size - 1); + TRACE_SYMTAB("acquiring .sdata bias = %#lx\n", di->sdata_bias); + } else { + BAD(".sdata"); + } + } + + /* Accept .rodata where mapped as rx (data), even if zero-sized */ + if (0 == VG_(strcmp)(name, ".rodata")) { + if (inrx && size >= 0 && !di->rodata_present) { + di->rodata_present = True; + di->rodata_svma = svma; + di->rodata_avma = svma + rx_bias; + di->rodata_size = size; + di->rodata_bias = rx_bias; + di->rodata_debug_svma = svma; + di->rodata_debug_bias = rw_bias; + TRACE_SYMTAB("acquiring .rodata svma = %#lx .. %#lx\n", + di->rodata_svma, + di->rodata_svma + di->rodata_size - 1); + TRACE_SYMTAB("acquiring .rodata avma = %#lx .. %#lx\n", + di->rodata_avma, + di->rodata_avma + di->rodata_size - 1); + TRACE_SYMTAB("acquiring .rodata bias = %#lx\n", di->rodata_bias); + } else { + BAD(".rodata"); + } + } + + /* Accept .bss where mapped as rw (data), even if zero-sized */ + if (0 == VG_(strcmp)(name, ".bss")) { + if (inrw && size >= 0 && !di->bss_present) { + di->bss_present = True; + di->bss_svma = svma; + di->bss_avma = svma + rw_bias; + di->bss_size = size; + di->bss_bias = rw_bias; + di->bss_debug_svma = svma; + di->bss_debug_bias = rw_bias; + TRACE_SYMTAB("acquiring .bss svma = %#lx .. %#lx\n", + di->bss_svma, + di->bss_svma + di->bss_size - 1); + TRACE_SYMTAB("acquiring .bss avma = %#lx .. %#lx\n", + di->bss_avma, + di->bss_avma + di->bss_size - 1); + TRACE_SYMTAB("acquiring .bss bias = %#lx\n", di->bss_bias); + } else + + /* Now one from the wtf?! department ... */ + if (inrx && (!inrw) && size >= 0 && !di->bss_present) { + /* File contains a .bss, but it got mapped as rx only. + This is very strange. For now, just pretend we didn't + see it :-) */ + di->bss_present = False; + di->bss_svma = 0; + di->bss_avma = 0; + di->bss_size = 0; + di->bss_bias = 0; + di->bss_debug_svma = 0; + di->bss_debug_bias = 0; + if (!VG_(clo_xml)) { + VG_(message)(Vg_UserMsg, "Warning: the following file's .bss is " + "mapped r-x only - ignoring .bss syms"); + VG_(message)(Vg_UserMsg, " %s", di->filename + ? di->filename + : (UChar*)"(null?!)" ); + } + } else + + if ((!inrw) && (!inrx) && size >= 0 && !di->bss_present) { + /* File contains a .bss, but it didn't get mapped. Ignore. */ + di->bss_present = False; + di->bss_svma = 0; + di->bss_avma = 0; + di->bss_size = 0; + di->bss_bias = 0; + } else { + BAD(".bss"); + } + } + + /* Accept .sbss where mapped as rw (data) */ + if (0 == VG_(strcmp)(name, ".sbss")) { + if (inrw && size > 0 && !di->sbss_present) { + di->sbss_present = True; + di->sbss_svma = svma; + di->sbss_avma = svma + rw_bias; + di->sbss_size = size; + di->sbss_bias = rw_bias; + di->sbss_debug_svma = svma; + di->sbss_debug_bias = rw_bias; + TRACE_SYMTAB("acquiring .sbss svma = %#lx .. %#lx\n", + di->sbss_svma, + di->sbss_svma + di->sbss_size - 1); + TRACE_SYMTAB("acquiring .sbss avma = %#lx .. %#lx\n", + di->sbss_avma, + di->sbss_avma + di->sbss_size - 1); + TRACE_SYMTAB("acquiring .sbss bias = %#lx\n", di->sbss_bias); + } else { + BAD(".sbss"); + } + } + + /* Accept .got where mapped as rw (data) */ + if (0 == VG_(strcmp)(name, ".got")) { + if (inrw && size > 0 && !di->got_present) { + di->got_present = True; + di->got_avma = svma + rw_bias; + di->got_size = size; + TRACE_SYMTAB("acquiring .got avma = %#lx\n", di->got_avma); + } else { + BAD(".got"); + } + } + + /* Accept .got.plt where mapped as rw (data) */ + if (0 == VG_(strcmp)(name, ".got.plt")) { + if (inrw && size > 0 && !di->gotplt_present) { + di->gotplt_present = True; + di->gotplt_avma = svma + rw_bias; + di->gotplt_size = size; + TRACE_SYMTAB("acquiring .got.plt avma = %#lx\n", di->gotplt_avma); + } else if (size != 0) { + BAD(".got.plt"); + } + } + + /* PLT is different on different platforms, it seems. */ +# if defined(VGP_x86_linux) || defined(VGP_amd64_linux) + /* Accept .plt where mapped as rx (code) */ + if (0 == VG_(strcmp)(name, ".plt")) { + if (inrx && size > 0 && !di->plt_present) { + di->plt_present = True; + di->plt_avma = svma + rx_bias; + di->plt_size = size; + TRACE_SYMTAB("acquiring .plt avma = %#lx\n", di->plt_avma); + } else { + BAD(".plt"); + } + } +# elif defined(VGP_ppc32_linux) + /* Accept .plt where mapped as rw (data) */ + if (0 == VG_(strcmp)(name, ".plt")) { + if (inrw && size > 0 && !di->plt_present) { + di->plt_present = True; + di->plt_avma = svma + rw_bias; + di->plt_size = size; + TRACE_SYMTAB("acquiring .plt avma = %#lx\n", di->plt_avma); + } else { + BAD(".plt"); + } + } +# elif defined(VGP_ppc64_linux) + /* Accept .plt where mapped as rw (data), or unmapped */ + if (0 == VG_(strcmp)(name, ".plt")) { + if (inrw && size > 0 && !di->plt_present) { + di->plt_present = True; + di->plt_avma = svma + rw_bias; + di->plt_size = size; + TRACE_SYMTAB("acquiring .plt avma = %#lx\n", di->plt_avma); + } else + if ((!inrw) && (!inrx) && size > 0 && !di->plt_present) { + /* File contains a .plt, but it didn't get mapped. + Presumably it is not required on this platform. At + least don't reject the situation as invalid. */ + di->plt_present = True; + di->plt_avma = 0; + di->plt_size = 0; + } else { + BAD(".plt"); + } + } +# else +# error "Unsupported platform" +# endif + + /* Accept .opd where mapped as rw (data) */ + if (0 == VG_(strcmp)(name, ".opd")) { + if (inrw && size > 0 && !di->opd_present) { + di->opd_present = True; + di->opd_avma = svma + rw_bias; + di->opd_size = size; + TRACE_SYMTAB("acquiring .opd avma = %#lx\n", di->opd_avma); + } else { + BAD(".opd"); + } + } + + /* Accept .eh_frame where mapped as rx (code). This seems to be + the common case. However, if that doesn't pan out, try for + rw (data) instead. */ + if (0 == VG_(strcmp)(name, ".eh_frame")) { + if (inrx && size > 0 && !di->ehframe_present) { + di->ehframe_present = True; + di->ehframe_avma = svma + rx_bias; + di->ehframe_size = size; + TRACE_SYMTAB("acquiring .eh_frame avma = %#lx\n", di->ehframe_avma); + } else + if (inrw && size > 0 && !di->ehframe_present) { + di->ehframe_present = True; + di->ehframe_avma = svma + rw_bias; + di->ehframe_size = size; + TRACE_SYMTAB("acquiring .eh_frame avma = %#lx\n", di->ehframe_avma); + } else { + BAD(".eh_frame"); + } + } + +# undef BAD + + } + + if (0) VG_(printf)("YYYY text_: avma %#lx size %ld bias %#lx\n", + di->text_avma, di->text_size, di->text_bias); + + if (VG_(clo_verbosity) > 2 || VG_(clo_trace_redir)) + VG_(message)(Vg_DebugMsg, " svma %#010lx, avma %#010lx", + di->text_avma - di->text_bias, + di->text_avma ); + + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("------ Finding image addresses " + "for debug-info sections ------\n"); + + /* Find interesting sections, read the symbol table(s), read any debug + information */ + { + /* IMAGE addresses: pointers to start of sections in the + transiently loaded oimage, not in the fragments of the file + mapped in by the guest's dynamic linker. */ + UChar* strtab_img = NULL; /* .strtab */ + ElfXX_Sym* symtab_img = NULL; /* .symtab */ + UChar* dynstr_img = NULL; /* .dynstr */ + ElfXX_Sym* dynsym_img = NULL; /* .dynsym */ + UChar* debuglink_img = NULL; /* .gnu_debuglink */ + UChar* stab_img = NULL; /* .stab (stabs) */ + UChar* stabstr_img = NULL; /* .stabstr (stabs) */ + UChar* debug_line_img = NULL; /* .debug_line (dwarf2) */ + UChar* debug_info_img = NULL; /* .debug_info (dwarf2) */ + UChar* debug_abbv_img = NULL; /* .debug_abbrev (dwarf2) */ + UChar* debug_str_img = NULL; /* .debug_str (dwarf2) */ + UChar* debug_ranges_img = NULL; /* .debug_ranges (dwarf2) */ + UChar* debug_loc_img = NULL; /* .debug_loc (dwarf2) */ + UChar* dwarf1d_img = NULL; /* .debug (dwarf1) */ + UChar* dwarf1l_img = NULL; /* .line (dwarf1) */ + UChar* ehframe_img = NULL; /* .eh_frame (dwarf2) */ + UChar* opd_img = NULL; /* .opd (dwarf2, + ppc64-linux) */ + /* Section sizes, in bytes */ + SizeT strtab_sz = 0; + SizeT symtab_sz = 0; + SizeT dynstr_sz = 0; + SizeT dynsym_sz = 0; + SizeT debuglink_sz = 0; + SizeT stab_sz = 0; + SizeT stabstr_sz = 0; + SizeT debug_line_sz = 0; + SizeT debug_info_sz = 0; + SizeT debug_abbv_sz = 0; + SizeT debug_str_sz = 0; + SizeT debug_ranges_sz = 0; + SizeT debug_loc_sz = 0; + SizeT dwarf1d_sz = 0; + SizeT dwarf1l_sz = 0; + SizeT ehframe_sz = 0; + SizeT opd_sz_unused = 0; + + /* Find all interesting sections */ + + /* What FIND does: it finds the section called SEC_NAME. The + size of it is assigned to SEC_SIZE. The address of the + section in the transiently loaded oimage is assigned to + SEC_FILEA. Even for sections which are marked loadable, the + client's ld.so may not have loaded them yet, so there is no + guarantee that we can safely prod around in any such area). + Because the entire object file is transiently mapped aboard + for inspection, it's always safe to inspect that area. */ + + for (i = 0; i < ehdr_img->e_shnum; i++) { + +# define FIND(sec_name, sec_size, sec_img) \ + do { ElfXX_Shdr* shdr \ + = INDEX_BIS( shdr_img, i, shdr_ent_szB ); \ + if (0 == VG_(strcmp)(sec_name, shdr_strtab_img \ + + shdr->sh_name)) { \ + Bool nobits; \ + sec_img = (void*)(oimage + shdr->sh_offset); \ + sec_size = shdr->sh_size; \ + nobits = shdr->sh_type == SHT_NOBITS; \ + TRACE_SYMTAB( "%18s: img %p .. %p\n", \ + sec_name, (UChar*)sec_img, \ + ((UChar*)sec_img) + sec_size - 1); \ + /* SHT_NOBITS sections have zero size in the file. */ \ + if ( shdr->sh_offset \ + + (nobits ? 0 : sec_size) > n_oimage ) { \ + ML_(symerr)(di, True, \ + " section beyond image end?!"); \ + goto out; \ + } \ + } \ + } while (0); + + /* NAME SIZE IMAGE addr */ + FIND(".dynsym", dynsym_sz, dynsym_img) + FIND(".dynstr", dynstr_sz, dynstr_img) + FIND(".symtab", symtab_sz, symtab_img) + FIND(".strtab", strtab_sz, strtab_img) + + FIND(".gnu_debuglink", debuglink_sz, debuglink_img) + + FIND(".stab", stab_sz, stab_img) + FIND(".stabstr", stabstr_sz, stabstr_img) + + FIND(".debug_line", debug_line_sz, debug_line_img) + FIND(".debug_info", debug_info_sz, debug_info_img) + FIND(".debug_abbrev", debug_abbv_sz, debug_abbv_img) + FIND(".debug_str", debug_str_sz, debug_str_img) + FIND(".debug_ranges", debug_ranges_sz, debug_ranges_img) + FIND(".debug_loc", debug_loc_sz, debug_loc_img) + + FIND(".debug", dwarf1d_sz, dwarf1d_img) + FIND(".line", dwarf1l_sz, dwarf1l_img) + FIND(".eh_frame", ehframe_sz, ehframe_img) + + FIND(".opd", opd_sz_unused, opd_img) + +# undef FIND + } + + /* Did we find a debuglink section? */ + if (debuglink_img != NULL) { + UInt crc_offset = VG_ROUNDUP(VG_(strlen)(debuglink_img)+1, 4); + UInt crc; + + vg_assert(crc_offset + sizeof(UInt) <= debuglink_sz); + + /* Extract the CRC from the debuglink section */ + crc = *(UInt *)(debuglink_img + crc_offset); + + /* See if we can find a matching debug file */ + dimage = find_debug_file( di, di->filename, debuglink_img, + crc, &n_dimage ); + + if (dimage != 0 + && n_dimage >= sizeof(ElfXX_Ehdr) + && ML_(is_elf_object_file)((void*)dimage, n_dimage)) { + + /* Pull out and validate program header and section header info */ + ElfXX_Ehdr* ehdr_dimg = (ElfXX_Ehdr*)dimage; + ElfXX_Phdr* phdr_dimg = (ElfXX_Phdr*)( ((UChar*)ehdr_dimg) + + ehdr_dimg->e_phoff ); + UWord phdr_dnent = ehdr_dimg->e_phnum; + UWord phdr_dent_szB = ehdr_dimg->e_phentsize; + ElfXX_Shdr* shdr_dimg = (ElfXX_Shdr*)( ((UChar*)ehdr_dimg) + + ehdr_dimg->e_shoff ); + UWord shdr_dnent = ehdr_dimg->e_shnum; + UWord shdr_dent_szB = ehdr_dimg->e_shentsize; + UChar* shdr_strtab_dimg = NULL; + + /* SVMAs covered by rx and rw segments and corresponding bias. */ + Addr rx_dsvma_base = 0; + Addr rx_dsvma_limit = 0; + PtrdiffT rx_dbias = 0; + Addr rw_dsvma_base = 0; + Addr rw_dsvma_limit = 0; + PtrdiffT rw_dbias = 0; + + Bool need_symtab, need_stabs, need_dwarf2, need_dwarf1; + + if (phdr_dnent == 0 + || !contained_within( + dimage, n_dimage, + (Addr)phdr_dimg, phdr_dnent * phdr_dent_szB)) { + ML_(symerr)(di, True, + "Missing or invalid ELF Program Header Table" + " (debuginfo file)"); + goto out; + } + + if (shdr_dnent == 0 + || !contained_within( + dimage, n_dimage, + (Addr)shdr_dimg, shdr_dnent * shdr_dent_szB)) { + ML_(symerr)(di, True, + "Missing or invalid ELF Section Header Table" + " (debuginfo file)"); + goto out; + } + + /* Also find the section header's string table, and validate. */ + /* checked previously by is_elf_object_file: */ + vg_assert( ehdr_dimg->e_shstrndx != SHN_UNDEF ); + + shdr_strtab_dimg + = (UChar*)( ((UChar*)ehdr_dimg) + + shdr_dimg[ehdr_dimg->e_shstrndx].sh_offset); + if (!contained_within( + dimage, n_dimage, + (Addr)shdr_strtab_dimg, + 1/*bogus, but we don't know the real size*/ )) { + ML_(symerr)(di, True, + "Invalid ELF Section Header String Table" + " (debuginfo file)"); + goto out; + } + + need_symtab = (NULL == symtab_img); + need_stabs = (NULL == stab_img); + need_dwarf2 = (NULL == debug_info_img); + need_dwarf1 = (NULL == dwarf1d_img); + + for (i = 0; i < ehdr_dimg->e_phnum; i++) { + ElfXX_Phdr* phdr + = INDEX_BIS( (void*)(dimage + ehdr_dimg->e_phoff), + i, phdr_ent_szB ); + if (phdr->p_type == PT_LOAD) { + if (rx_dsvma_limit == 0 + && phdr->p_offset >= di->rx_map_foff + && phdr->p_offset < di->rx_map_foff + di->rx_map_size + && phdr->p_offset + phdr->p_filesz <= di->rx_map_foff + di->rx_map_size) { + rx_dsvma_base = phdr->p_vaddr; + rx_dsvma_limit = phdr->p_vaddr + phdr->p_memsz; + rx_dbias = di->rx_map_avma - di->rx_map_foff + phdr->p_offset - phdr->p_vaddr; + } + else if (rw_dsvma_limit == 0 + && phdr->p_offset >= di->rw_map_foff + && phdr->p_offset < di->rw_map_foff + di->rw_map_size + && phdr->p_offset + phdr->p_filesz <= di->rw_map_foff + di->rw_map_size) { + rw_dsvma_base = phdr->p_vaddr; + rw_dsvma_limit = phdr->p_vaddr + phdr->p_memsz; + rw_dbias = di->rw_map_avma - di->rw_map_foff + phdr->p_offset - phdr->p_vaddr; + } + } + } + + /* Find all interesting sections */ + for (i = 0; i < ehdr_dimg->e_shnum; i++) { + + /* Find debug svma and bias information for sections + we found in the main file. */ + +# define FIND(sec, seg) \ + do { ElfXX_Shdr* shdr \ + = INDEX_BIS( shdr_dimg, i, shdr_dent_szB ); \ + if (di->sec##_present \ + && 0 == VG_(strcmp)("." #sec, \ + shdr_strtab_dimg + shdr->sh_name)) { \ + vg_assert(di->sec##_size == shdr->sh_size); \ + vg_assert(di->sec##_avma + shdr->sh_addr + seg##_dbias); \ + di->sec##_debug_svma = shdr->sh_addr; \ + di->sec##_debug_bias = seg##_dbias; \ + TRACE_SYMTAB("acquiring ." #sec " debug svma = %#lx .. %#lx\n", \ + di->sec##_debug_svma, \ + di->sec##_debug_svma + di->sec##_size - 1); \ + TRACE_SYMTAB("acquiring ." #sec " debug bias = %#lx\n", \ + di->sec##_debug_bias); \ + } \ + } while (0); + + /* SECTION SEGMENT */ + FIND(text, rx) + FIND(data, rw) + FIND(sdata, rw) + FIND(rodata, rw) + FIND(bss, rw) + FIND(sbss, rw) + +# undef FIND + + /* Same deal as previous FIND, except only do it for those + sections for which we didn't find anything useful in + the main file. */ + +# define FIND(condition, sec_name, sec_size, sec_img) \ + do { ElfXX_Shdr* shdr \ + = INDEX_BIS( shdr_dimg, i, shdr_dent_szB ); \ + if (condition \ + && 0 == VG_(strcmp)(sec_name, \ + shdr_strtab_dimg + shdr->sh_name)) { \ + Bool nobits; \ + if (0 != sec_img) \ + VG_(core_panic)("repeated section!\n"); \ + sec_img = (void*)(dimage + shdr->sh_offset); \ + sec_size = shdr->sh_size; \ + nobits = shdr->sh_type == SHT_NOBITS; \ + TRACE_SYMTAB( "%18s: dimg %p .. %p\n", \ + sec_name, \ + (UChar*)sec_img, \ + ((UChar*)sec_img) + sec_size - 1); \ + /* SHT_NOBITS sections have zero size in the file. */ \ + if ( shdr->sh_offset \ + + (nobits ? 0 : sec_size) > n_dimage ) { \ + ML_(symerr)(di, True, \ + " section beyond image end?!"); \ + goto out; \ + } \ + } \ + } while (0); + + /* NEEDED? NAME SIZE IMAGE addr */ + FIND(need_symtab, ".symtab", symtab_sz, symtab_img) + FIND(need_symtab, ".strtab", strtab_sz, strtab_img) + FIND(need_stabs, ".stab", stab_sz, stab_img) + FIND(need_stabs, ".stabstr", stabstr_sz, stabstr_img) + FIND(need_dwarf2, ".debug_line", debug_line_sz, debug_line_img) + FIND(need_dwarf2, ".debug_info", debug_info_sz, debug_info_img) + FIND(need_dwarf2, ".debug_abbrev", debug_abbv_sz, debug_abbv_img) + FIND(need_dwarf2, ".debug_str", debug_str_sz, debug_str_img) + FIND(need_dwarf2, ".debug_ranges", debug_ranges_sz, + debug_ranges_img) + FIND(need_dwarf2, ".debug_loc", debug_loc_sz, debug_loc_img) + FIND(need_dwarf1, ".debug", dwarf1d_sz, dwarf1d_img) + FIND(need_dwarf1, ".line", dwarf1l_sz, dwarf1l_img) + +# undef FIND + } + } + } + + /* Check some sizes */ + vg_assert((dynsym_sz % sizeof(ElfXX_Sym)) == 0); + vg_assert((symtab_sz % sizeof(ElfXX_Sym)) == 0); + + /* Read symbols */ + { + void (*read_elf_symtab)(struct _DebugInfo*,UChar*, + ElfXX_Sym*,SizeT, + UChar*,SizeT, + Bool,UChar*); + Bool symtab_in_debug; +# if defined(VGP_ppc64_linux) + read_elf_symtab = read_elf_symtab__ppc64_linux; +# else + read_elf_symtab = read_elf_symtab__normal; +# endif + symtab_in_debug = (Addr)symtab_img >= dimage + && (Addr)symtab_img < dimage + n_dimage; + read_elf_symtab(di, "symbol table", + symtab_img, symtab_sz, + strtab_img, strtab_sz, + symtab_in_debug, opd_img); + + read_elf_symtab(di, "dynamic symbol table", + dynsym_img, dynsym_sz, + dynstr_img, dynstr_sz, + False, opd_img); + } + + /* Read .eh_frame (call-frame-info) if any */ + if (ehframe_img) { + vg_assert(ehframe_sz == di->ehframe_size); + ML_(read_callframe_info_dwarf3)( di, ehframe_img ); + } + + /* Read the stabs and/or dwarf2 debug information, if any. It + appears reading stabs stuff on amd64-linux doesn't work, so + we ignore it. */ +# if !defined(VGP_amd64_linux) + if (stab_img && stabstr_img) { + ML_(read_debuginfo_stabs) ( di, stab_img, stab_sz, + stabstr_img, stabstr_sz ); + } +# endif + /* jrs 2006-01-01: icc-8.1 has been observed to generate + binaries without debug_str sections. Don't preclude + debuginfo reading for that reason, but, in + read_unitinfo_dwarf2, do check that debugstr is non-NULL + before using it. */ + if (debug_info_img && debug_abbv_img && debug_line_img + /* && debug_str_img */) { + + /* The old reader: line numbers and unwind info only */ + ML_(read_debuginfo_dwarf3) ( di, + debug_info_img, debug_info_sz, + debug_abbv_img, debug_abbv_sz, + debug_line_img, debug_line_sz, + debug_str_img, debug_str_sz ); + + /* The new reader: read the DIEs in .debug_info to acquire + information on variable types and locations. But only if + the tool asks for it, or the user requests it on the + command line. */ + if (VG_(needs).var_info /* the tool requires it */ + || VG_(clo_read_var_info) /* the user asked for it */) { + ML_(new_dwarf3_reader)( + di, debug_info_img, debug_info_sz, + debug_abbv_img, debug_abbv_sz, + debug_line_img, debug_line_sz, + debug_str_img, debug_str_sz, + debug_ranges_img, debug_ranges_sz, + debug_loc_img, debug_loc_sz + ); + } + } + if (dwarf1d_img && dwarf1l_img) { + ML_(read_debuginfo_dwarf1) ( di, dwarf1d_img, dwarf1d_sz, + dwarf1l_img, dwarf1l_sz ); + } + } + res = True; + + out: { + SysRes m_res; + + /* Last, but not least, heave the image(s) back overboard. */ + if (dimage) { + m_res = VG_(am_munmap_valgrind) ( dimage, n_dimage ); + vg_assert(!m_res.isError); + } + m_res = VG_(am_munmap_valgrind) ( oimage, n_oimage ); + vg_assert(!m_res.isError); + return res; + } +} + + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/readpdb.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/readpdb.c.svn-base new file mode 100644 index 0000000..18c479c --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/readpdb.c.svn-base @@ -0,0 +1,2267 @@ + +/*--------------------------------------------------------------------*/ +/*--- Reading of syms & debug info from PDB-format files. ---*/ +/*--- readpdb.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + Spring 2008: + derived from readelf.c and valgrind-20031012-wine/vg_symtab2.c + derived from wine-1.0/tools/winedump/pdb.c and msc.c + + Copyright (C) 2000-2008 Julian Seward + jseward@acm.org + Copyright 2006 Eric Pouech (winedump/pdb.c and msc.c) + GNU Lesser General Public License version 2.1 or later applies. + Copyright (C) 2008 BitWagon Software LLC + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "pub_core_basics.h" +#include "pub_core_debuginfo.h" +#include "pub_core_vki.h" // VKI_PAGE_SIZE +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_options.h" // VG_(clo_verbosity) +#include "pub_core_xarray.h" // keeps priv_storage.h happy +#include "pub_core_redir.h" + +#include "priv_misc.h" /* dinfo_zalloc/free/strdup */ +#include "priv_d3basics.h" +#include "priv_storage.h" +#include "priv_readpdb.h" // self + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Biasing ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/* JRS 2009-Apr-13: Mostly this PDB reader is straightforward. But + the biasing is incomprehensible, and I don't claim to understand it + at all. There are four places where biasing is required: + + - when reading symbol addresses (DEBUG_SnarfCodeView) + - when reading old-style line number tables (DEBUG_SnarfLinetab) + - when reading new-style line number tables (codeview_dump_linetab2) + - when reading FPO (stack-unwind) tables (pdb_dump) + + To complicate matters further, Wine supplies us, via the + VG_USERREQ__LOAD_PDB_DEBUGINFO client request that initiates PDB + reading, a value 'reloc' which, if you read 'virtual.c' in the Wine + sources, looks a lot like a text bias value. Yet the code below + ignores it. + + To make future experimentation with biasing easier, here are four + macros which give the bias to use in each of the four cases. Be + warned, they can and do refer to local vars in the relevant + functions. */ + +/* This is the biasing arrangement in John's original patch. I don't + see that is makes any sense for the FPO bias to be hardwired to + zero, but perhaps that's OK when the reloc value is also zero. + (iow, the FPO bias should actually be 'reloc' ?) */ +#define BIAS_FOR_SYMBOLS (di->rx_map_avma) +#define BIAS_FOR_LINETAB (di->rx_map_avma) +#define BIAS_FOR_LINETAB2 (di->text_bias) +#define BIAS_FOR_FPO 0 /* no, really */ + +/* This module leaks space; enable m_main's calling of + VG_(di_discard_ALL_debuginfo)() at shutdown and run with + --profile-heap=yes to see. The main culprit appears to be + di.readpe.pdr.1. I haven't bothered to chase it further. */ + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- PE/PDB definitions ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +typedef UInt DWORD; +typedef UShort WORD; +typedef UChar BYTE; + + +/* the following DOS and WINDOWS structures, defines and PE/PDB + * parsing code are copied or derived from the WINE + * project - http://www.winehq.com/ + */ + +/* + * File formats definitions + */ +#define OFFSET_OF(__c,__f) ((int)(((char*)&(((__c*)0)->__f))-((char*)0))) +#define WIN32_PATH_MAX 256 + +#pragma pack(2) +typedef struct _IMAGE_DOS_HEADER { + unsigned short e_magic; /* 00: MZ Header signature */ + unsigned short e_cblp; /* 02: Bytes on last page of file */ + unsigned short e_cp; /* 04: Pages in file */ + unsigned short e_crlc; /* 06: Relocations */ + unsigned short e_cparhdr; /* 08: Size of header in paragraphs */ + unsigned short e_minalloc; /* 0a: Minimum extra paragraphs needed */ + unsigned short e_maxalloc; /* 0c: Maximum extra paragraphs needed */ + unsigned short e_ss; /* 0e: Initial (relative) SS value */ + unsigned short e_sp; /* 10: Initial SP value */ + unsigned short e_csum; /* 12: Checksum */ + unsigned short e_ip; /* 14: Initial IP value */ + unsigned short e_cs; /* 16: Initial (relative) CS value */ + unsigned short e_lfarlc; /* 18: File address of relocation table */ + unsigned short e_ovno; /* 1a: Overlay number */ + unsigned short e_res[4]; /* 1c: Reserved words */ + unsigned short e_oemid; /* 24: OEM identifier (for e_oeminfo) */ + unsigned short e_oeminfo; /* 26: OEM information; e_oemid specific */ + unsigned short e_res2[10]; /* 28: Reserved words */ + unsigned long e_lfanew; /* 3c: Offset to extended header */ +} IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER; + +#define IMAGE_DOS_SIGNATURE 0x5A4D /* MZ */ +#define IMAGE_OS2_SIGNATURE 0x454E /* NE */ +#define IMAGE_OS2_SIGNATURE_LE 0x454C /* LE */ +#define IMAGE_OS2_SIGNATURE_LX 0x584C /* LX */ +#define IMAGE_VXD_SIGNATURE 0x454C /* LE */ +#define IMAGE_NT_SIGNATURE 0x00004550 /* PE00 */ + +/* Subsystem Values */ + +#define IMAGE_SUBSYSTEM_UNKNOWN 0 +#define IMAGE_SUBSYSTEM_NATIVE 1 +#define IMAGE_SUBSYSTEM_WINDOWS_GUI 2 /* Windows GUI subsystem */ +#define IMAGE_SUBSYSTEM_WINDOWS_CUI 3 /* Windows character subsystem*/ +#define IMAGE_SUBSYSTEM_OS2_CUI 5 +#define IMAGE_SUBSYSTEM_POSIX_CUI 7 + +typedef struct _IMAGE_FILE_HEADER { + unsigned short Machine; + unsigned short NumberOfSections; + unsigned long TimeDateStamp; + unsigned long PointerToSymbolTable; + unsigned long NumberOfSymbols; + unsigned short SizeOfOptionalHeader; + unsigned short Characteristics; +} IMAGE_FILE_HEADER, *PIMAGE_FILE_HEADER; + +typedef struct _IMAGE_DATA_DIRECTORY { + unsigned long VirtualAddress; + unsigned long Size; +} IMAGE_DATA_DIRECTORY, *PIMAGE_DATA_DIRECTORY; + +#define IMAGE_NUMBEROF_DIRECTORY_ENTRIES 16 + +typedef struct _IMAGE_OPTIONAL_HEADER { + + /* Standard fields */ + + unsigned short Magic; /* 0x10b or 0x107 */ /* 0x00 */ + unsigned char MajorLinkerVersion; + unsigned char MinorLinkerVersion; + unsigned long SizeOfCode; + unsigned long SizeOfInitializedData; + unsigned long SizeOfUninitializedData; + unsigned long AddressOfEntryPoint; /* 0x10 */ + unsigned long BaseOfCode; + unsigned long BaseOfData; + + /* NT additional fields */ + + unsigned long ImageBase; + unsigned long SectionAlignment; /* 0x20 */ + unsigned long FileAlignment; + unsigned short MajorOperatingSystemVersion; + unsigned short MinorOperatingSystemVersion; + unsigned short MajorImageVersion; + unsigned short MinorImageVersion; + unsigned short MajorSubsystemVersion; /* 0x30 */ + unsigned short MinorSubsystemVersion; + unsigned long Win32VersionValue; + unsigned long SizeOfImage; + unsigned long SizeOfHeaders; + unsigned long CheckSum; /* 0x40 */ + unsigned short Subsystem; + unsigned short DllCharacteristics; + unsigned long SizeOfStackReserve; + unsigned long SizeOfStackCommit; + unsigned long SizeOfHeapReserve; /* 0x50 */ + unsigned long SizeOfHeapCommit; + unsigned long LoaderFlags; + unsigned long NumberOfRvaAndSizes; + IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES]; /* 0x60 */ + /* 0xE0 */ +} IMAGE_OPTIONAL_HEADER, *PIMAGE_OPTIONAL_HEADER; + +typedef struct _IMAGE_NT_HEADERS { + unsigned long Signature; /* "PE"\0\0 */ /* 0x00 */ + IMAGE_FILE_HEADER FileHeader; /* 0x04 */ + IMAGE_OPTIONAL_HEADER OptionalHeader; /* 0x18 */ +} IMAGE_NT_HEADERS, *PIMAGE_NT_HEADERS; + +#define IMAGE_SIZEOF_SHORT_NAME 8 + +typedef struct _IMAGE_SECTION_HEADER { + unsigned char Name[IMAGE_SIZEOF_SHORT_NAME]; + union { + unsigned long PhysicalAddress; + unsigned long VirtualSize; + } Misc; + unsigned long VirtualAddress; + unsigned long SizeOfRawData; + unsigned long PointerToRawData; + unsigned long PointerToRelocations; + unsigned long PointerToLinenumbers; + unsigned short NumberOfRelocations; + unsigned short NumberOfLinenumbers; + unsigned long Characteristics; +} IMAGE_SECTION_HEADER, *PIMAGE_SECTION_HEADER; + +#define IMAGE_SIZEOF_SECTION_HEADER 40 + +#define IMAGE_FIRST_SECTION(ntheader) \ + ((PIMAGE_SECTION_HEADER)((LPunsigned char)&((PIMAGE_NT_HEADERS)(ntheader))->OptionalHeader + \ + ((PIMAGE_NT_HEADERS)(ntheader))->FileHeader.SizeOfOptionalHeader)) + +/* These defines are for the Characteristics bitfield. */ +/* #define IMAGE_SCN_TYPE_REG 0x00000000 - Reserved */ +/* #define IMAGE_SCN_TYPE_DSECT 0x00000001 - Reserved */ +/* #define IMAGE_SCN_TYPE_NOLOAD 0x00000002 - Reserved */ +/* #define IMAGE_SCN_TYPE_GROUP 0x00000004 - Reserved */ +/* #define IMAGE_SCN_TYPE_NO_PAD 0x00000008 - Reserved */ +/* #define IMAGE_SCN_TYPE_COPY 0x00000010 - Reserved */ + +#define IMAGE_SCN_CNT_CODE 0x00000020 +#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 +#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 + +#define IMAGE_SCN_LNK_OTHER 0x00000100 +#define IMAGE_SCN_LNK_INFO 0x00000200 +/* #define IMAGE_SCN_TYPE_OVER 0x00000400 - Reserved */ +#define IMAGE_SCN_LNK_REMOVE 0x00000800 +#define IMAGE_SCN_LNK_COMDAT 0x00001000 + +/* 0x00002000 - Reserved */ +/* #define IMAGE_SCN_MEM_PROTECTED 0x00004000 - Obsolete */ +#define IMAGE_SCN_MEM_FARDATA 0x00008000 + +/* #define IMAGE_SCN_MEM_SYSHEAP 0x00010000 - Obsolete */ +#define IMAGE_SCN_MEM_PURGEABLE 0x00020000 +#define IMAGE_SCN_MEM_16BIT 0x00020000 +#define IMAGE_SCN_MEM_LOCKED 0x00040000 +#define IMAGE_SCN_MEM_PRELOAD 0x00080000 + +#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 +#define IMAGE_SCN_ALIGN_2BYTES 0x00200000 +#define IMAGE_SCN_ALIGN_4BYTES 0x00300000 +#define IMAGE_SCN_ALIGN_8BYTES 0x00400000 +#define IMAGE_SCN_ALIGN_16BYTES 0x00500000 /* Default */ +#define IMAGE_SCN_ALIGN_32BYTES 0x00600000 +#define IMAGE_SCN_ALIGN_64BYTES 0x00700000 +/* 0x00800000 - Unused */ + +#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 + + +#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 +#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 +#define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 +#define IMAGE_SCN_MEM_SHARED 0x10000000 +#define IMAGE_SCN_MEM_EXECUTE 0x20000000 +#define IMAGE_SCN_MEM_READ 0x40000000 +#define IMAGE_SCN_MEM_WRITE 0x80000000 + +#pragma pack() + +typedef struct _GUID /* 16 bytes */ +{ + unsigned int Data1; + unsigned short Data2; + unsigned short Data3; + unsigned char Data4[ 8 ]; +} GUID; + +/*======================================================================== + * Process PDB file. + */ + +#pragma pack(1) +typedef struct _PDB_FILE +{ + unsigned long size; + unsigned long unknown; + +} PDB_FILE, *PPDB_FILE; + +// A .pdb file begins with a variable-length one-line text string +// that ends in "\r\n\032". This is followed by a 4-byte "signature" +// ("DS\0\0" for newer files, "JG\0\0" for older files), then +// aligned up to a 4-byte boundary, then the struct below: +struct PDB_JG_HEADER +{ + //char ident[40]; // "Microsoft C/C++ program database 2.00\r\n\032" + //unsigned long signature; // "JG\0\0" + unsigned int blocksize; // 0x400 typical; also 0x800, 0x1000 + unsigned short freelist; + unsigned short total_alloc; + PDB_FILE toc; + unsigned short toc_block[ 1 ]; +}; + +struct PDB_DS_HEADER +{ + //char signature[32]; // "Microsoft C/C++ MSF 7.00\r\n\032DS\0\0" + unsigned int block_size; + unsigned int unknown1; + unsigned int num_pages; + unsigned int toc_size; + unsigned int unknown2; + unsigned int toc_page; +}; + +struct PDB_JG_TOC +{ + unsigned int nFiles; + PDB_FILE file[ 1 ]; + +}; + +struct PDB_DS_TOC +{ + unsigned int num_files; + unsigned int file_size[1]; +}; + +struct PDB_JG_ROOT +{ + unsigned int version; + unsigned int TimeDateStamp; + unsigned int age; + unsigned int cbNames; + char names[ 1 ]; +}; + +struct PDB_DS_ROOT +{ + unsigned int version; + unsigned int TimeDateStamp; + unsigned int age; + GUID guid; + unsigned int cbNames; + char names[1]; +}; + +typedef struct _PDB_TYPES_OLD +{ + unsigned long version; + unsigned short first_index; + unsigned short last_index; + unsigned long type_size; + unsigned short file; + unsigned short pad; + +} PDB_TYPES_OLD, *PPDB_TYPES_OLD; + +typedef struct _PDB_TYPES +{ + unsigned long version; + unsigned long type_offset; + unsigned long first_index; + unsigned long last_index; + unsigned long type_size; + unsigned short file; + unsigned short pad; + unsigned long hash_size; + unsigned long hash_base; + unsigned long hash_offset; + unsigned long hash_len; + unsigned long search_offset; + unsigned long search_len; + unsigned long unknown_offset; + unsigned long unknown_len; + +} PDB_TYPES, *PPDB_TYPES; + +typedef struct _PDB_SYMBOL_RANGE +{ + unsigned short segment; + unsigned short pad1; + unsigned long offset; + unsigned long size; + unsigned long characteristics; + unsigned short index; + unsigned short pad2; + +} PDB_SYMBOL_RANGE, *PPDB_SYMBOL_RANGE; + +typedef struct _PDB_SYMBOL_RANGE_EX +{ + unsigned short segment; + unsigned short pad1; + unsigned long offset; + unsigned long size; + unsigned long characteristics; + unsigned short index; + unsigned short pad2; + unsigned long timestamp; + unsigned long unknown; + +} PDB_SYMBOL_RANGE_EX, *PPDB_SYMBOL_RANGE_EX; + +typedef struct _PDB_SYMBOL_FILE +{ + unsigned long unknown1; + PDB_SYMBOL_RANGE range; + unsigned short flag; + unsigned short file; + unsigned long symbol_size; + unsigned long lineno_size; + unsigned long unknown2; + unsigned long nSrcFiles; + unsigned long attribute; + char filename[ 1 ]; + +} PDB_SYMBOL_FILE, *PPDB_SYMBOL_FILE; + +typedef struct _PDB_SYMBOL_FILE_EX +{ + unsigned long unknown1; + PDB_SYMBOL_RANGE_EX range; + unsigned short flag; + unsigned short file; + unsigned long symbol_size; + unsigned long lineno_size; + unsigned long unknown2; + unsigned long nSrcFiles; + unsigned long attribute; + unsigned long reserved[ 2 ]; + char filename[ 1 ]; + +} PDB_SYMBOL_FILE_EX, *PPDB_SYMBOL_FILE_EX; + +typedef struct _PDB_SYMBOL_SOURCE +{ + unsigned short nModules; + unsigned short nSrcFiles; + unsigned short table[ 1 ]; + +} PDB_SYMBOL_SOURCE, *PPDB_SYMBOL_SOURCE; + +typedef struct _PDB_SYMBOL_IMPORT +{ + unsigned long unknown1; + unsigned long unknown2; + unsigned long TimeDateStamp; + unsigned long nRequests; + char filename[ 1 ]; + +} PDB_SYMBOL_IMPORT, *PPDB_SYMBOL_IMPORT; + +typedef struct _PDB_SYMBOLS_OLD +{ + unsigned short hash1_file; + unsigned short hash2_file; + unsigned short gsym_file; + unsigned short pad; + unsigned long module_size; + unsigned long offset_size; + unsigned long hash_size; + unsigned long srcmodule_size; + +} PDB_SYMBOLS_OLD, *PPDB_SYMBOLS_OLD; + +typedef struct _PDB_SYMBOLS +{ + unsigned long signature; + unsigned long version; + unsigned long unknown; + unsigned long hash1_file; + unsigned long hash2_file; + unsigned long gsym_file; + unsigned long module_size; + unsigned long offset_size; + unsigned long hash_size; + unsigned long srcmodule_size; + unsigned long pdbimport_size; + unsigned long resvd[ 5 ]; + +} PDB_SYMBOLS, *PPDB_SYMBOLS; +#pragma pack() + +/*======================================================================== + * Process CodeView symbol information. + */ + +/* from wine-1.0/include/wine/mscvpdb.h */ + +struct p_string /* "Pascal string": prefixed by byte containing length */ +{ + unsigned char namelen; + char name[1]; +}; +/* The other kind of "char name[1]" is a "C++ string" terminated by '\0'. + * "Name mangling" to encode type information often exceeds 255 bytes. + * Instead of using a 2-byte explicit length, they save one byte of space + * but incur a strlen(). This is justified by other code that wants + * a "C string" [terminated by '\0'] anyway. + */ + +union codeview_symbol +{ + struct + { + short int len; + short int id; + } generic; + + struct + { + short int len; + short int id; + unsigned int offset; + unsigned short segment; + unsigned short symtype; + struct p_string p_name; + } data_v1; + + struct + { + short int len; + short int id; + unsigned int symtype; + unsigned int offset; + unsigned short segment; + struct p_string p_name; + } data_v2; + + struct + { + short int len; + short int id; + unsigned int symtype; + unsigned int offset; + unsigned short segment; + char name[1]; /* terminated by '\0' */ + } data_v3; + + struct + { + short int len; + short int id; + unsigned int pparent; + unsigned int pend; + unsigned int next; + unsigned int offset; + unsigned short segment; + unsigned short thunk_len; + unsigned char thtype; + struct p_string p_name; + } thunk_v1; + + struct + { + short int len; + short int id; + unsigned int pparent; + unsigned int pend; + unsigned int next; + unsigned int offset; + unsigned short segment; + unsigned short thunk_len; + unsigned char thtype; + char name[1]; /* terminated by '\0' */ + } thunk_v3; + + struct + { + short int len; + short int id; + unsigned int pparent; + unsigned int pend; + unsigned int next; + unsigned int proc_len; + unsigned int debug_start; + unsigned int debug_end; + unsigned int offset; + unsigned short segment; + unsigned short proctype; + unsigned char flags; + struct p_string p_name; + } proc_v1; + + struct + { + short int len; + short int id; + unsigned int pparent; + unsigned int pend; + unsigned int next; + unsigned int proc_len; + unsigned int debug_start; + unsigned int debug_end; + unsigned int proctype; + unsigned int offset; + unsigned short segment; + unsigned char flags; + struct p_string p_name; + } proc_v2; + + struct + { + short int len; + short int id; + unsigned int pparent; + unsigned int pend; + unsigned int next; + unsigned int proc_len; + unsigned int debug_start; + unsigned int debug_end; + unsigned int proctype; + unsigned int offset; + unsigned short segment; + unsigned char flags; + char name[1]; /* terminated by '\0' */ + } proc_v3; + + struct + { + short int len; + short int id; + unsigned int symtype; + unsigned int offset; + unsigned short segment; + struct p_string p_name; + } public_v2; + + struct + { + short int len; + short int id; + unsigned int symtype; + unsigned int offset; + unsigned short segment; + char name[1]; /* terminated by '\0' */ + } public_v3; + + struct + { + short int len; /* Total length of this entry */ + short int id; /* Always S_BPREL_V1 */ + unsigned int offset; /* Stack offset relative to BP */ + unsigned short symtype; + struct p_string p_name; + } stack_v1; + + struct + { + short int len; /* Total length of this entry */ + short int id; /* Always S_BPREL_V2 */ + unsigned int offset; /* Stack offset relative to EBP */ + unsigned int symtype; + struct p_string p_name; + } stack_v2; + + struct + { + short int len; /* Total length of this entry */ + short int id; /* Always S_BPREL_V3 */ + int offset; /* Stack offset relative to BP */ + unsigned int symtype; + char name[1]; /* terminated by '\0' */ + } stack_v3; + + struct + { + short int len; /* Total length of this entry */ + short int id; /* Always S_BPREL_V3 */ + int offset; /* Stack offset relative to BP */ + unsigned int symtype; + unsigned short unknown; + char name[1]; /* terminated by '\0' */ + } stack_xxxx_v3; + + struct + { + short int len; /* Total length of this entry */ + short int id; /* Always S_REGISTER */ + unsigned short type; + unsigned short reg; + struct p_string p_name; + /* don't handle register tracking */ + } register_v1; + + struct + { + short int len; /* Total length of this entry */ + short int id; /* Always S_REGISTER_V2 */ + unsigned int type; /* check whether type & reg are correct */ + unsigned short reg; + struct p_string p_name; + /* don't handle register tracking */ + } register_v2; + + struct + { + short int len; /* Total length of this entry */ + short int id; /* Always S_REGISTER_V3 */ + unsigned int type; /* check whether type & reg are correct */ + unsigned short reg; + char name[1]; /* terminated by '\0' */ + /* don't handle register tracking */ + } register_v3; + + struct + { + short int len; + short int id; + unsigned int parent; + unsigned int end; + unsigned int length; + unsigned int offset; + unsigned short segment; + struct p_string p_name; + } block_v1; + + struct + { + short int len; + short int id; + unsigned int parent; + unsigned int end; + unsigned int length; + unsigned int offset; + unsigned short segment; + char name[1]; /* terminated by '\0' */ + } block_v3; + + struct + { + short int len; + short int id; + unsigned int offset; + unsigned short segment; + unsigned char flags; + struct p_string p_name; + } label_v1; + + struct + { + short int len; + short int id; + unsigned int offset; + unsigned short segment; + unsigned char flags; + char name[1]; /* terminated by '\0' */ + } label_v3; + + struct + { + short int len; + short int id; + unsigned short type; + unsigned short cvalue; /* numeric leaf */ +#if 0 + struct p_string p_name; +#endif + } constant_v1; + + struct + { + short int len; + short int id; + unsigned type; + unsigned short cvalue; /* numeric leaf */ +#if 0 + struct p_string p_name; +#endif + } constant_v2; + + struct + { + short int len; + short int id; + unsigned type; + unsigned short cvalue; +#if 0 + char name[1]; /* terminated by '\0' */ +#endif + } constant_v3; + + struct + { + short int len; + short int id; + unsigned short type; + struct p_string p_name; + } udt_v1; + + struct + { + short int len; + short int id; + unsigned type; + struct p_string p_name; + } udt_v2; + + struct + { + short int len; + short int id; + unsigned int type; + char name[1]; /* terminated by '\0' */ + } udt_v3; + + struct + { + short int len; + short int id; + char signature[4]; + struct p_string p_name; + } objname_v1; + + struct + { + short int len; + short int id; + unsigned int unknown; + struct p_string p_name; + } compiland_v1; + + struct + { + short int len; + short int id; + unsigned unknown1[4]; + unsigned short unknown2; + struct p_string p_name; + } compiland_v2; + + struct + { + short int len; + short int id; + unsigned int unknown; + char name[1]; /* terminated by '\0' */ + } compiland_v3; + + struct + { + short int len; + short int id; + unsigned int offset; + unsigned short segment; + } ssearch_v1; +}; + +#define S_COMPILAND_V1 0x0001 +#define S_REGISTER_V1 0x0002 +#define S_CONSTANT_V1 0x0003 +#define S_UDT_V1 0x0004 +#define S_SSEARCH_V1 0x0005 +#define S_END_V1 0x0006 +#define S_SKIP_V1 0x0007 +#define S_CVRESERVE_V1 0x0008 +#define S_OBJNAME_V1 0x0009 +#define S_ENDARG_V1 0x000a +#define S_COBOLUDT_V1 0x000b +#define S_MANYREG_V1 0x000c +#define S_RETURN_V1 0x000d +#define S_ENTRYTHIS_V1 0x000e + +#define S_BPREL_V1 0x0200 +#define S_LDATA_V1 0x0201 +#define S_GDATA_V1 0x0202 +#define S_PUB_V1 0x0203 +#define S_LPROC_V1 0x0204 +#define S_GPROC_V1 0x0205 +#define S_THUNK_V1 0x0206 +#define S_BLOCK_V1 0x0207 +#define S_WITH_V1 0x0208 +#define S_LABEL_V1 0x0209 +#define S_CEXMODEL_V1 0x020a +#define S_VFTPATH_V1 0x020b +#define S_REGREL_V1 0x020c +#define S_LTHREAD_V1 0x020d +#define S_GTHREAD_V1 0x020e + +#define S_PROCREF_V1 0x0400 +#define S_DATAREF_V1 0x0401 +#define S_ALIGN_V1 0x0402 +#define S_LPROCREF_V1 0x0403 + +#define S_REGISTER_V2 0x1001 /* Variants with new 32-bit type indices */ +#define S_CONSTANT_V2 0x1002 +#define S_UDT_V2 0x1003 +#define S_COBOLUDT_V2 0x1004 +#define S_MANYREG_V2 0x1005 +#define S_BPREL_V2 0x1006 +#define S_LDATA_V2 0x1007 +#define S_GDATA_V2 0x1008 +#define S_PUB_V2 0x1009 +#define S_LPROC_V2 0x100a +#define S_GPROC_V2 0x100b +#define S_VFTTABLE_V2 0x100c +#define S_REGREL_V2 0x100d +#define S_LTHREAD_V2 0x100e +#define S_GTHREAD_V2 0x100f +#if 0 +#define S_XXXXXXXXX_32 0x1012 /* seems linked to a function, content unknown */ +#endif +#define S_COMPILAND_V2 0x1013 + +#define S_COMPILAND_V3 0x1101 +#define S_THUNK_V3 0x1102 +#define S_BLOCK_V3 0x1103 +#define S_LABEL_V3 0x1105 +#define S_REGISTER_V3 0x1106 +#define S_CONSTANT_V3 0x1107 +#define S_UDT_V3 0x1108 +#define S_BPREL_V3 0x110B +#define S_LDATA_V3 0x110C +#define S_GDATA_V3 0x110D +#define S_PUB_V3 0x110E +#define S_LPROC_V3 0x110F +#define S_GPROC_V3 0x1110 +#define S_BPREL_XXXX_V3 0x1111 /* not really understood, but looks like bprel... */ +#define S_MSTOOL_V3 0x1116 /* compiler command line options and build information */ +#define S_PUB_FUNC1_V3 0x1125 /* didn't get the difference between the two */ +#define S_PUB_FUNC2_V3 0x1127 + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- pdb-reading: bits and pieces ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +struct pdb_reader +{ + void* (*read_file)(struct pdb_reader*, unsigned, unsigned *); + // JRS 2009-Apr-8: .uu_n_pdbimage is never used. + UChar* pdbimage; // image address + SizeT uu_n_pdbimage; // size + union { + struct { + struct PDB_JG_HEADER* header; + struct PDB_JG_TOC* toc; + } jg; + struct { + struct PDB_DS_HEADER* header; + struct PDB_DS_TOC* toc; + } ds; + } u; +}; + + +static void* pdb_ds_read( struct pdb_reader* pdb, + unsigned* block_list, + unsigned size ) +{ + unsigned blocksize, nBlocks; + UChar* buffer; + UInt i; + + if (!size) return NULL; + + blocksize = pdb->u.ds.header->block_size; + nBlocks = (size + blocksize - 1) / blocksize; + buffer = ML_(dinfo_zalloc)("di.readpe.pdr.1", nBlocks * blocksize); + for (i = 0; i < nBlocks; i++) + VG_(memcpy)( buffer + i * blocksize, + pdb->pdbimage + block_list[i] * blocksize, + blocksize ); + return buffer; +} + + +static void* pdb_jg_read( struct pdb_reader* pdb, + unsigned short* block_list, + int size ) +{ + unsigned blocksize, nBlocks; + UChar* buffer; + UInt i; + //VG_(printf)("pdb_read %p %p %d\n", pdb, block_list, size); + if ( !size ) return NULL; + + blocksize = pdb->u.jg.header->blocksize; + nBlocks = (size + blocksize-1) / blocksize; + buffer = ML_(dinfo_zalloc)("di.readpe.pjr.1", nBlocks * blocksize); + for ( i = 0; i < nBlocks; i++ ) + VG_(memcpy)( buffer + i*blocksize, + pdb->pdbimage + block_list[i]*blocksize, blocksize ); + return buffer; +} + + +static void* find_pdb_header( UChar* pdbimage, + unsigned* signature ) +{ + static char pdbtxt[]= "Microsoft C/C++"; + UChar* txteof = (UChar*)VG_(strchr)(pdbimage, '\032'); + if (! txteof) + return NULL; + if (0!=VG_(strncmp)(pdbimage, pdbtxt, -1+ sizeof(pdbtxt))) + return NULL; + + *signature = *(unsigned*)(1+ txteof); + return (void*)((~3& (3+ (4+ 1+ (txteof - pdbimage)))) + pdbimage); +} + + +static void* pdb_ds_read_file( struct pdb_reader* reader, + unsigned file_number, + unsigned* plength ) +{ + unsigned i, *block_list; + if (!reader->u.ds.toc || file_number >= reader->u.ds.toc->num_files) + return NULL; + if (reader->u.ds.toc->file_size[file_number] == 0 + || reader->u.ds.toc->file_size[file_number] == 0xFFFFFFFF) + return NULL; + + block_list + = reader->u.ds.toc->file_size + reader->u.ds.toc->num_files; + for (i = 0; i < file_number; i++) + block_list += (reader->u.ds.toc->file_size[i] + + reader->u.ds.header->block_size - 1) + / + reader->u.ds.header->block_size; + if (plength) + *plength = reader->u.ds.toc->file_size[file_number]; + return pdb_ds_read( reader, block_list, + reader->u.ds.toc->file_size[file_number]); +} + + +static void* pdb_jg_read_file( struct pdb_reader* pdb, + unsigned fileNr, + unsigned *plength ) +{ + //VG_(printf)("pdb_read_file %p %d\n", pdb, fileNr); + unsigned blocksize = pdb->u.jg.header->blocksize; + struct PDB_JG_TOC* toc = pdb->u.jg.toc; + unsigned i; + unsigned short* block_list; + + if ( !toc || fileNr >= toc->nFiles ) + return NULL; + + block_list + = (unsigned short *) &toc->file[ toc->nFiles ]; + for ( i = 0; i < fileNr; i++ ) + block_list += (toc->file[i].size + blocksize-1) / blocksize; + + if (plength) + *plength = toc->file[fileNr].size; + return pdb_jg_read( pdb, block_list, toc->file[fileNr].size ); +} + + +static void pdb_ds_init( struct pdb_reader * reader, + UChar* pdbimage, + SizeT n_pdbimage ) +{ + reader->read_file = pdb_ds_read_file; + reader->pdbimage = pdbimage; + reader->uu_n_pdbimage = n_pdbimage; + reader->u.ds.toc + = pdb_ds_read( + reader, + (unsigned*)(reader->u.ds.header->block_size + * reader->u.ds.header->toc_page + + reader->pdbimage), + reader->u.ds.header->toc_size + ); +} + + +static void pdb_jg_init( struct pdb_reader* reader, + char* pdbimage, + unsigned n_pdbimage ) +{ + reader->read_file = pdb_jg_read_file; + reader->pdbimage = pdbimage; + reader->uu_n_pdbimage = n_pdbimage; + reader->u.jg.toc = pdb_jg_read(reader, + reader->u.jg.header->toc_block, + reader->u.jg.header->toc.size); +} + + + + +static +void pdb_check_root_version_and_timestamp( char* pdbname, + ULong pdbmtime, + unsigned version, + UInt TimeDateStamp ) +{ + switch ( version ) { + case 19950623: /* VC 4.0 */ + case 19950814: + case 19960307: /* VC 5.0 */ + case 19970604: /* VC 6.0 */ + case 20000404: /* VC 7.0 FIXME?? */ + break; + default: + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, + "Unknown .pdb root block version %d\n", version ); + } + if ( TimeDateStamp != pdbmtime ) { + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, + "Wrong time stamp of .PDB file %s (0x%08x, 0x%08llx)\n", + pdbname, TimeDateStamp, pdbmtime ); + } +} + + +static DWORD pdb_get_file_size( struct pdb_reader* reader, unsigned idx ) +{ + if (reader->read_file == pdb_jg_read_file) + return reader->u.jg.toc->file[idx].size; + else + return reader->u.ds.toc->file_size[idx]; +} + + +static void pdb_convert_types_header( PDB_TYPES *types, char* image ) +{ + VG_(memset)( types, 0, sizeof(PDB_TYPES) ); + if ( !image ) + return; + if ( *(unsigned long *)image < 19960000 ) { /* FIXME: correct version? */ + /* Old version of the types record header */ + PDB_TYPES_OLD *old = (PDB_TYPES_OLD *)image; + types->version = old->version; + types->type_offset = sizeof(PDB_TYPES_OLD); + types->type_size = old->type_size; + types->first_index = old->first_index; + types->last_index = old->last_index; + types->file = old->file; + } else { + /* New version of the types record header */ + *types = *(PDB_TYPES *)image; + } +} + + +static void pdb_convert_symbols_header( PDB_SYMBOLS *symbols, + int *header_size, char* image ) +{ + VG_(memset)( symbols, 0, sizeof(PDB_SYMBOLS) ); + if ( !image ) + return; + if ( *(unsigned long *)image != 0xffffffff ) { + /* Old version of the symbols record header */ + PDB_SYMBOLS_OLD *old = (PDB_SYMBOLS_OLD *)image; + symbols->version = 0; + symbols->module_size = old->module_size; + symbols->offset_size = old->offset_size; + symbols->hash_size = old->hash_size; + symbols->srcmodule_size = old->srcmodule_size; + symbols->pdbimport_size = 0; + symbols->hash1_file = old->hash1_file; + symbols->hash2_file = old->hash2_file; + symbols->gsym_file = old->gsym_file; + *header_size = sizeof(PDB_SYMBOLS_OLD); + } else { + /* New version of the symbols record header */ + *symbols = *(PDB_SYMBOLS *)image; + *header_size = sizeof(PDB_SYMBOLS); + } +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Main stuff: reading of symbol addresses ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +static Bool debug = False; // JRS: fixme + +static ULong DEBUG_SnarfCodeView( + DebugInfo* di, + IMAGE_SECTION_HEADER* sectp, + void* root, /* FIXME: better name */ + Int offset, + Int size + ) +{ + Int i, length; + DiSym vsym; + UChar* nmstr; + Char symname[4096 /*WIN32_PATH_MAX*/]; + + Addr bias = BIAS_FOR_SYMBOLS; + ULong n_syms_read = 0; + + if (debug) + VG_(message)(Vg_UserMsg, "SnarfCodeView addr=%p offset=%d length=%d", + root, offset, size ); + + VG_(memset)(&vsym, 0, sizeof(vsym)); /* avoid holes */ + /* + * Loop over the different types of records and whenever we + * find something we are interested in, record it and move on. + */ + for ( i = offset; i < size; i += length ) + { + union codeview_symbol *sym = (union codeview_symbol *)((char *)root + i); + + length = sym->generic.len + 2; + + //VG_(printf)("id=%x len=%d\n", sym->generic.id, length); + switch ( sym->generic.id ) { + + default: + if (0) { + VG_(printf)("unknown id 0x%x len=0x%x at %p\n", + sym->generic.id, sym->generic.len, sym); + VG_(printf)(" %8x %8x %8x %8x\n", + ((int *)sym)[1],((int *)sym)[2], + ((int *)sym)[3],((int *)sym)[4]); + VG_(printf)(" %8x %8x %8x %8x\n", + ((int *)sym)[5],((int *)sym)[6], + ((int *)sym)[7],((int *)sym)[8]); + } + break; + /* + * Global and local data symbols. We don't associate these + * with any given source file. + */ + case S_GDATA_V1: + case S_LDATA_V1: + case S_PUB_V1: + VG_(memcpy)(symname, sym->data_v1.p_name.name, + sym->data_v1.p_name.namelen); + symname[sym->data_v1.p_name.namelen] = '\0'; + + if (debug) + VG_(message)(Vg_UserMsg, "Data %s", symname ); + + if (0 /*VG_(needs).data_syms*/) { + nmstr = ML_(addStr)(di, symname, sym->data_v1.p_name.namelen); + + vsym.addr = bias + sectp[sym->data_v1.segment-1].VirtualAddress + + sym->data_v1.offset; + vsym.name = nmstr; + vsym.size = sym->data_v1.p_name.namelen; + // FIXME: .namelen is sizeof(.data) including .name[] + vsym.isText = (sym->generic.id == S_PUB_V1); + ML_(addSym)( di, &vsym ); + n_syms_read++; + } + break; + case S_GDATA_V2: + case S_LDATA_V2: + case S_PUB_V2: { + Int const k = sym->data_v2.p_name.namelen; + VG_(memcpy)(symname, sym->data_v2.p_name.name, k); + symname[k] = '\0'; + + if (debug) + VG_(message)(Vg_UserMsg, + "S_GDATA_V2/S_LDATA_V2/S_PUB_V2 %s", symname ); + + if (sym->generic.id==S_PUB_V2 /*VG_(needs).data_syms*/) { + nmstr = ML_(addStr)(di, symname, k); + + vsym.addr = bias + sectp[sym->data_v2.segment-1].VirtualAddress + + sym->data_v2.offset; + vsym.name = nmstr; + vsym.size = 4000; + // FIXME: data_v2.len is sizeof(.data), + // not size of function! + vsym.isText = !!(IMAGE_SCN_CNT_CODE + & sectp[sym->data_v2.segment-1].Characteristics); + ML_(addSym)( di, &vsym ); + n_syms_read++; + } + break; + } + case S_PUB_V3: + /* not completely sure of those two anyway */ + case S_PUB_FUNC1_V3: + case S_PUB_FUNC2_V3: { + Int k = sym->public_v3.len - (-1+ sizeof(sym->public_v3)); + if ((-1+ sizeof(symname)) < k) + k = -1+ sizeof(symname); + VG_(memcpy)(symname, sym->public_v3.name, k); + symname[k] = '\0'; + + if (debug) + VG_(message)(Vg_UserMsg, + "S_PUB_FUNC1_V3/S_PUB_FUNC2_V3/S_PUB_V3 %s", symname ); + + if (1 /*sym->generic.id==S_PUB_FUNC1_V3 + || sym->generic.id==S_PUB_FUNC2_V3*/) { + nmstr = ML_(addStr)(di, symname, k); + + vsym.addr = bias + sectp[sym->public_v3.segment-1].VirtualAddress + + sym->public_v3.offset; + vsym.name = nmstr; + vsym.size = 4000; + // FIXME: public_v3.len is not length of the + // .text of the function + vsym.isText = !!(IMAGE_SCN_CNT_CODE + & sectp[sym->data_v2.segment-1].Characteristics); + ML_(addSym)( di, &vsym ); + n_syms_read++; + } + break; + } + + /* + * Sort of like a global function, but it just points + * to a thunk, which is a stupid name for what amounts to + * a PLT slot in the normal jargon that everyone else uses. + */ + case S_THUNK_V3: + case S_THUNK_V1: + /* valgrind ignores PLTs */ /* JRS: it does? */ + break; + + /* + * Global and static functions. + */ + case S_GPROC_V1: + case S_LPROC_V1: + VG_(memcpy)(symname, sym->proc_v1.p_name.name, + sym->proc_v1.p_name.namelen); + symname[sym->proc_v1.p_name.namelen] = '\0'; + nmstr = ML_(addStr)(di, symname, sym->proc_v1.p_name.namelen); + + vsym.addr = bias + sectp[sym->proc_v1.segment-1].VirtualAddress + + sym->proc_v1.offset; + vsym.name = nmstr; + vsym.size = sym->proc_v1.proc_len; + vsym.isText = True; + if (debug) + VG_(message)(Vg_UserMsg, "Adding function %s addr=%#lx length=%d", + symname, vsym.addr, vsym.size ); + ML_(addSym)( di, &vsym ); + n_syms_read++; + break; + + case S_GPROC_V2: + case S_LPROC_V2: + VG_(memcpy)(symname, sym->proc_v2.p_name.name, + sym->proc_v2.p_name.namelen); + symname[sym->proc_v2.p_name.namelen] = '\0'; + nmstr = ML_(addStr)(di, symname, sym->proc_v2.p_name.namelen); + + vsym.addr = bias + sectp[sym->proc_v2.segment-1].VirtualAddress + + sym->proc_v2.offset; + vsym.name = nmstr; + vsym.size = sym->proc_v2.proc_len; + vsym.isText = True; + if (debug) + VG_(message)(Vg_UserMsg, "Adding function %s addr=%#lx length=%d", + symname, vsym.addr, vsym.size ); + ML_(addSym)( di, &vsym ); + n_syms_read++; + break; + case S_LPROC_V3: + case S_GPROC_V3: { + if (debug) + VG_(message)(Vg_UserMsg, + "S_LPROC_V3/S_GPROC_V3 %s", sym->proc_v3.name ); + + if (1) { + nmstr = ML_(addStr)(di, sym->proc_v3.name, + VG_(strlen)(sym->proc_v3.name)); + + vsym.addr = bias + sectp[sym->proc_v3.segment-1].VirtualAddress + + sym->proc_v3.offset; + vsym.name = nmstr; + vsym.size = sym->proc_v3.proc_len; + vsym.isText = 1; + ML_(addSym)( di, &vsym ); + n_syms_read++; + } + break; + } + /* JRS: how is flow supposed to arrive at commented out code below? */ + //if (nest_block) + //{ + // printf(">>> prev func '%s' still has nest_block %u count\n", + // curr_func, nest_block); + // nest_block = 0; + //} + //curr_func = strdup(sym->proc_v3.name); + /* EPP unsigned int pparent; */ + /* EPP unsigned int pend; */ + /* EPP unsigned int next; */ + /* EPP unsigned int debug_start; */ + /* EPP unsigned int debug_end; */ + /* EPP unsigned char flags; */ + // break; + + + /* + * Function parameters and stack variables. + */ + case S_BPREL_XXXX_V3: + case S_BPREL_V3: + case S_BPREL_V2: + case S_BPREL_V1: + /* ignored */ + break; + + case S_LABEL_V3: // FIXME + case S_LABEL_V1: + break; + + case S_SSEARCH_V1: + case S_ALIGN_V1: + case S_MSTOOL_V3: + case S_UDT_V3: + case S_UDT_V2: + case S_UDT_V1: + case S_CONSTANT_V3: + case S_CONSTANT_V1: + case S_OBJNAME_V1: + case S_END_V1: + case S_COMPILAND_V3: + case S_COMPILAND_V2: + case S_COMPILAND_V1: + case S_BLOCK_V3: + case S_BLOCK_V1: + case S_REGISTER_V3: + case S_REGISTER_V2: + case S_REGISTER_V1: + /* ignored */ + break; + + /* + * These are special, in that they are always followed by an + * additional length-prefixed string which is *not* included + * into the symbol length count. We need to skip it. + */ + case S_PROCREF_V1: + case S_DATAREF_V1: + case S_LPROCREF_V1: { + unsigned char *name = (unsigned char *)sym + length; + length += (*name + 1 + 3) & ~3; + break; + } + } /* switch ( sym->generic.id ) */ + + } /* for ( i = offset; i < size; i += length ) */ + + return n_syms_read; +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Main stuff: reading of line number tables ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +union any_size +{ + char const *c; + short const *s; + int const *i; + unsigned int const *ui; +}; + +struct startend +{ + unsigned int start; + unsigned int end; +}; + +static ULong DEBUG_SnarfLinetab( + DebugInfo* di, + IMAGE_SECTION_HEADER* sectp, + Char* linetab, + Int size + ) +{ + //VG_(printf)("DEBUG_SnarfLinetab %p %p %p %d\n", di, sectp, linetab, size); + Int file_segcount; + Char filename[WIN32_PATH_MAX]; + UInt * filetab; + UChar * fn; + Int i; + Int k; + UInt * lt_ptr; + Int nfile; + Int nseg; + union any_size pnt; + union any_size pnt2; + struct startend * start; + Int this_seg; + + Addr bias = BIAS_FOR_LINETAB; + ULong n_lines_read = 0; + + /* + * Now get the important bits. + */ + pnt.c = linetab; + nfile = *pnt.s++; + nseg = *pnt.s++; + + filetab = (unsigned int *) pnt.c; + + /* + * Now count up the number of segments in the file. + */ + nseg = 0; + for (i = 0; i < nfile; i++) { + pnt2.c = linetab + filetab[i]; + nseg += *pnt2.s; + } + + this_seg = 0; + for (i = 0; i < nfile; i++) { + UChar *fnmstr; + UChar *dirstr; + + /* + * Get the pointer into the segment information. + */ + pnt2.c = linetab + filetab[i]; + file_segcount = *pnt2.s; + + pnt2.ui++; + lt_ptr = (unsigned int *) pnt2.c; + start = (struct startend *) (lt_ptr + file_segcount); + + /* + * Now snarf the filename for all of the segments for this file. + */ + fn = (UChar*) (start + file_segcount); + /* fn now points at a Pascal-style string, that is, the first + byte is the length, and the remaining up to 255 (presumably) + are the contents. */ + vg_assert(WIN32_PATH_MAX >= 256); + VG_(memset)(filename, 0, sizeof(filename)); + VG_(memcpy)(filename, fn + 1, *fn); + vg_assert(filename[ sizeof(filename)-1 ] == 0); + filename[(Int)*fn] = 0; + fnmstr = VG_(strrchr)(filename, '\\'); + if (fnmstr == NULL) + fnmstr = filename; + else + ++fnmstr; + k = VG_(strlen)(fnmstr); + dirstr = ML_(addStr)(di, filename, *fn - k); + fnmstr = ML_(addStr)(di, fnmstr, k); + + for (k = 0; k < file_segcount; k++, this_seg++) { + Int linecount; + Int segno; + + pnt2.c = linetab + lt_ptr[k]; + + segno = *pnt2.s++; + linecount = *pnt2.s++; + + if ( linecount > 0 ) { + UInt j; + + if (debug) + VG_(message)(Vg_UserMsg, + "Adding %d lines for file %s segment %d addr=%#x end=%#x", + linecount, filename, segno, start[k].start, start[k].end ); + + for ( j = 0; j < linecount; j++ ) { + Addr startaddr = bias + sectp[segno-1].VirtualAddress + + pnt2.ui[j]; + Addr endaddr = bias + sectp[segno-1].VirtualAddress + + ((j < (linecount - 1)) + ? pnt2.ui[j+1] + : start[k].end); + if (debug) + VG_(message)(Vg_UserMsg, + "Adding line %d addr=%#lx end=%#lx", + ((unsigned short *)(pnt2.ui + linecount))[j], + startaddr, endaddr ); + ML_(addLineInfo)( + di, fnmstr, dirstr, startaddr, endaddr, + ((unsigned short *)(pnt2.ui + linecount))[j], j ); + n_lines_read++; + } + } + } + } + + return n_lines_read; +} + + + +/* there's a new line tab structure from MS Studio 2005 and after + * it's made of: + * DWORD 000000f4 + * DWORD lineblk_offset (counting bytes after this field) + * an array of codeview_linetab2_file structures + * an array (starting at <lineblk_offset>) of codeview_linetab2_block structures + */ + +struct codeview_linetab2_file +{ + DWORD offset; /* offset in string table for filename */ + WORD unk; /* always 0x0110... type of following + information ??? */ + BYTE md5[16]; /* MD5 signature of file (signature on + file's content or name ???) */ + WORD pad0; /* always 0 */ +}; + +struct codeview_linetab2_block +{ + DWORD header; /* 0x000000f2 */ + DWORD size_of_block; /* next block is at # bytes after this field */ + DWORD start; /* start address of function with line numbers */ + DWORD seg; /* segment of function with line numbers */ + DWORD size; /* size of function with line numbers */ + DWORD file_offset; /* offset for accessing corresponding + codeview_linetab2_file */ + DWORD nlines; /* number of lines in this block */ + DWORD size_lines; /* number of bytes following for line + number information */ + struct { + DWORD offset; /* offset (from <seg>:<start>) for line number */ + DWORD lineno; /* the line number (OR:ed with + 0x80000000 why ???) */ + } l[1]; /* actually array of <nlines> */ +}; + +static ULong codeview_dump_linetab2( + DebugInfo* di, + Char* linetab, + DWORD size, + Char* strimage, + DWORD strsize, + Char* pfx + ) +{ + DWORD offset; + unsigned i; + struct codeview_linetab2_block* lbh; + struct codeview_linetab2_file* fd; + //const Bool debug = False; + + Addr bias = BIAS_FOR_LINETAB2; + ULong n_line2s_read = 0; + + if (*(const DWORD*)linetab != 0x000000f4) + return 0; + offset = *((DWORD*)linetab + 1); + lbh = (struct codeview_linetab2_block*)(linetab + 8 + offset); + + while ((Char*)lbh < linetab + size) { + + HChar *filename, *dirname; + Addr svma_s, svma_e; + if (lbh->header != 0x000000f2) { + /* FIXME: should also check that whole lbh fits in linetab + size */ + if (debug) + VG_(printf)("%sblock end %x\n", pfx, lbh->header); + break; + } + if (debug) + VG_(printf)("%sblock from %04x:%08x-%08x (size %u) (%u lines)\n", + pfx, lbh->seg, lbh->start, lbh->start + lbh->size - 1, + lbh->size, lbh->nlines); + fd = (struct codeview_linetab2_file*)(linetab + 8 + lbh->file_offset); + if (debug) + VG_(printf)( + "%s md5=%02x%02x%02x%02x%02x%02x%02x%02x" + "%02x%02x%02x%02x%02x%02x%02x%02x\n", + pfx, fd->md5[ 0], fd->md5[ 1], fd->md5[ 2], fd->md5[ 3], + fd->md5[ 4], fd->md5[ 5], fd->md5[ 6], fd->md5[ 7], + fd->md5[ 8], fd->md5[ 9], fd->md5[10], fd->md5[11], + fd->md5[12], fd->md5[13], fd->md5[14], fd->md5[15] ); + /* FIXME: should check that string is within strimage + strsize */ + if (strimage) { + dirname = strimage + fd->offset; + filename = VG_(strrchr)(dirname, '\\'); + if (filename == NULL) { + filename = ML_(addStr)(di, dirname, -1); + dirname = NULL; + } else { + dirname = ML_(addStr)(di, dirname, VG_(strlen)(dirname) + - VG_(strlen)(filename)); + filename = ML_(addStr)(di, filename+1, -1); + } + } else { + filename = ML_(addStr)(di, "???", -1); + dirname = NULL; + } + + if (debug) + VG_(printf)("%s file=%s\n", pfx, filename); + + for (i = 0; i < lbh->nlines; i++) { + if (debug) + VG_(printf)("%s offset=%08x line=%d\n", + pfx, lbh->l[i].offset, lbh->l[i].lineno ^ 0x80000000); + } + + if (lbh->nlines > 1) { + for (i = 0; i < lbh->nlines-1; i++) { + svma_s = lbh->start + lbh->l[i].offset; + svma_e = lbh->start + lbh->l[i+1].offset-1; + if (debug) + VG_(printf)("%s line %d: %08lx to %08lx\n", + pfx, lbh->l[i].lineno ^ 0x80000000, svma_s, svma_e); + ML_(addLineInfo)( di, filename, dirname, + bias + svma_s, + bias + svma_e + 1, + lbh->l[i].lineno ^ 0x80000000, 0 ); + n_line2s_read++; + } + svma_s = lbh->start + lbh->l[ lbh->nlines-1].offset; + svma_e = lbh->start + lbh->size - 1; + if (debug) + VG_(printf)("%s line %d: %08lx to %08lx\n", + pfx, lbh->l[ lbh->nlines-1 ].lineno ^ 0x80000000, + svma_s, svma_e); + ML_(addLineInfo)( di, filename, dirname, + bias + svma_s, + bias + svma_e + 1, + lbh->l[lbh->nlines-1].lineno ^ 0x80000000, 0 ); + n_line2s_read++; + } + + lbh = (struct codeview_linetab2_block*) + ((char*)lbh + 8 + lbh->size_of_block); + } + return n_line2s_read; +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- Main stuff: pdb_dump ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +/* JRS fixme: compare with version in current Wine sources */ +static void pdb_dump( struct pdb_reader* pdb, + DebugInfo* di, + Addr pe_avma, + Int reloc, + IMAGE_SECTION_HEADER* sectp_avma ) +{ + Int header_size; + + PDB_TYPES types; + PDB_SYMBOLS symbols; + unsigned len_modimage; + char *modimage; + char *file; + + Addr bias_for_fpo = BIAS_FOR_FPO; + + ULong n_fpos_read = 0, n_syms_read = 0, + n_lines_read = 0, n_line2s_read = 0; + + // FIXME: symbols for bare indices 1,2,3,5 in .pdb file + + char* types_image = pdb->read_file( pdb, 2, 0 ); + char* symbols_image = pdb->read_file( pdb, 3, 0 ); + + /* establish filesimage and filessize. These are only needed for + reading linetab2 tables, as far as I can deduce from the Wine + sources. */ + char* filesimage = pdb->read_file( pdb, 12, 0); /* FIXME: really fixed ??? */ + UInt filessize = 0; + if (filesimage) { + if (*(const DWORD*)filesimage == 0xeffeeffe) { + filessize = *(const DWORD*)(filesimage + 8); + } else { + if (0) + VG_(printf)("wrong header %x expecting 0xeffeeffe\n", + *(const DWORD*)filesimage); + ML_(dinfo_free)( (void*)filesimage); + filesimage = NULL; + } + } + + if (VG_(clo_verbosity) > 0) { + VG_(message)(Vg_DebugMsg, + "PDB_READER:"); + VG_(message)(Vg_DebugMsg, + " BIAS_FOR_SYMBOLS = %#08lx %s", + (PtrdiffT)BIAS_FOR_SYMBOLS, VG_STRINGIFY(BIAS_FOR_SYMBOLS)); + VG_(message)(Vg_DebugMsg, + " BIAS_FOR_LINETAB = %#08lx %s", + (PtrdiffT)BIAS_FOR_LINETAB, VG_STRINGIFY(BIAS_FOR_LINETAB)); + VG_(message)(Vg_DebugMsg, + " BIAS_FOR_LINETAB2 = %#08lx %s", + (PtrdiffT)BIAS_FOR_LINETAB2, VG_STRINGIFY(BIAS_FOR_LINETAB2)); + VG_(message)(Vg_DebugMsg, + " BIAS_FOR_FPO = %#08lx %s", + (PtrdiffT)BIAS_FOR_FPO, VG_STRINGIFY(BIAS_FOR_FPO)); + VG_(message)(Vg_DebugMsg, + " RELOC = %#08lx", + (PtrdiffT)reloc); + } + + /* Since we just use the FPO data without reformatting, at least + do a basic sanity check on the struct layout. */ + vg_assert(sizeof(FPO_DATA) == 16); + if (di->text_present) { + /* only load FPO if there's text present (otherwise it's + meaningless?) */ + unsigned sz = 0; + di->fpo = pdb->read_file( pdb, 5, &sz ); + di->fpo_size = sz; + } else { + vg_assert(di->fpo == NULL); + vg_assert(di->fpo_size == 0); + } + + if (di->fpo) { + Word i; + Addr min_svma = ~(Addr)0; + Addr max_svma = (Addr)0; + vg_assert(sizeof(di->fpo[0]) == 16); + di->fpo_size /= sizeof(di->fpo[0]); + + /* Sanity-check the table, and find the min and max avmas. */ + for (i = 0; i < di->fpo_size; i++) { + /* If any of the following assertions fail, we'll need to add + an extra pass to tidy up the FPO info -- make them be in + order and non-overlapping, since in-orderness and + non-overlappingness are required for safe use of + ML_(search_one_fpotab). */ + vg_assert(di->fpo[i].cbProcSize > 0); + if (i > 0) { + Bool ok; + Bool dup + = di->fpo[i-1].ulOffStart == di->fpo[i].ulOffStart + && di->fpo[i-1].cbProcSize == di->fpo[i].cbProcSize; + /* tolerate exact duplicates -- I think they are harmless + w.r.t. termination properties of the binary search in + ML_(search_one_fpotab). */ + if (dup) + continue; + + ok = di->fpo[i-1].ulOffStart + di->fpo[i-1].cbProcSize + <= di->fpo[i].ulOffStart; + if (1 && !ok) + VG_(printf)("%#x +%d then %#x +%d\n", + di->fpo[i-1].ulOffStart, di->fpo[i-1].cbProcSize, + di->fpo[i-0].ulOffStart, di->fpo[i-0].cbProcSize ); + vg_assert(ok); + } + /* Update min/max limits as we go along. */ + if (di->fpo[i].ulOffStart < min_svma) + min_svma = di->fpo[i].ulOffStart; + if (di->fpo[i].ulOffStart + di->fpo[i].cbProcSize - 1 > max_svma) + max_svma = di->fpo[i].ulOffStart + di->fpo[i].cbProcSize - 1; + } + /* Now bias the table. This can't be done in the same pass as + the sanity check, hence a second loop. */ + for (i = 0; i < di->fpo_size; i++) { + di->fpo[i].ulOffStart += bias_for_fpo; + } + + /* And record min/max */ + vg_assert(min_svma <= max_svma); /* should always hold */ + + di->fpo_minavma = min_svma + bias_for_fpo; + di->fpo_maxavma = max_svma + bias_for_fpo; + + /* biasing shouldn't cause wraparound (?!) */ + vg_assert(di->fpo_minavma <= di->fpo_maxavma); + + if (0) { + VG_(printf)("XXXXXXXXX min/max svma %#lx %#lx\n", + min_svma, max_svma); + VG_(printf)("XXXXXXXXX min/max avma %#lx %#lx\n", + di->fpo_minavma, di->fpo_maxavma); + } + + n_fpos_read += (ULong)di->fpo_size; + } + + pdb_convert_types_header( &types, types_image ); + switch ( types.version ) { + case 19950410: /* VC 4.0 */ + case 19951122: + case 19961031: /* VC 5.0 / 6.0 */ + case 20040203: /* VC 7.0 FIXME?? */ + break; + default: + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, "Unknown .pdb type info version %ld\n", + types.version ); + } + + header_size = 0; + pdb_convert_symbols_header( &symbols, &header_size, symbols_image ); + switch ( symbols.version ) { + case 0: /* VC 4.0 */ + case 19960307: /* VC 5.0 */ + case 19970606: /* VC 6.0 */ + case 19990903: /* VC 7.0 FIXME?? */ + break; + default: + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, "Unknown .pdb symbol info version %ld\n", + symbols.version ); + } + + /* + * Read global symbol table + */ + modimage = pdb->read_file( pdb, symbols.gsym_file, &len_modimage ); + if (modimage) { + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, "Reading global symbols\n" ); + DEBUG_SnarfCodeView( di, sectp_avma, modimage, 0, len_modimage ); + ML_(dinfo_free)( (void*)modimage ); + } + + /* + * Read per-module symbol / linenumber tables + */ + file = symbols_image + header_size; + while ( file - symbols_image < header_size + symbols.module_size ) { + int file_nr, file_index, symbol_size, lineno_size; + char *file_name; + + if ( symbols.version < 19970000 ) { + PDB_SYMBOL_FILE *sym_file = (PDB_SYMBOL_FILE *) file; + file_nr = sym_file->file; + file_name = sym_file->filename; + file_index = sym_file->range.index; + symbol_size = sym_file->symbol_size; + lineno_size = sym_file->lineno_size; + } else { + PDB_SYMBOL_FILE_EX *sym_file = (PDB_SYMBOL_FILE_EX *) file; + file_nr = sym_file->file; + file_name = sym_file->filename; + file_index = sym_file->range.index; + symbol_size = sym_file->symbol_size; + lineno_size = sym_file->lineno_size; + } + + modimage = pdb->read_file( pdb, file_nr, 0 ); + if (modimage) { + Int total_size; + if (0) VG_(printf)("lineno_size %d symbol_size %d\n", + lineno_size, symbol_size ); + + total_size = pdb_get_file_size(pdb, file_nr); + + if (symbol_size) { + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, "Reading symbols for %s", file_name ); + n_syms_read + += DEBUG_SnarfCodeView( di, sectp_avma, modimage, + sizeof(unsigned long), + symbol_size ); + } + + if (lineno_size) { + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, "Reading lines for %s", file_name ); + n_lines_read + += DEBUG_SnarfLinetab( di, sectp_avma, + modimage + symbol_size, lineno_size ); + } + + /* anyway, lineno_size doesn't see to really be the size of + * the line number information, and it's not clear yet when + * to call for linetab2... + */ + n_line2s_read + += codeview_dump_linetab2( + di, (char*)modimage + symbol_size + lineno_size, + total_size - (symbol_size + lineno_size), + /* if filesimage is NULL, pass that directly onwards + to codeview_dump_linetab2, so it knows not to + poke around in there. */ + filesimage ? filesimage + 12 : NULL, + filessize, " " + ); + + ML_(dinfo_free)( (void*)modimage ); + } + + file_name += VG_(strlen)(file_name) + 1; + file = (char *)( + (unsigned long)(file_name + + VG_(strlen)(file_name) + 1 + 3) & ~3 ); + } + + /* + * Cleanup + */ + if ( symbols_image ) ML_(dinfo_free)( symbols_image ); + if ( types_image ) ML_(dinfo_free)( types_image ); + if ( pdb->u.jg.toc ) ML_(dinfo_free)( pdb->u.jg.toc ); + + if (VG_(clo_verbosity) > 0) { + VG_(message)(Vg_DebugMsg," # symbols read = %llu", n_syms_read ); + VG_(message)(Vg_DebugMsg," # lines read = %llu", n_lines_read ); + VG_(message)(Vg_DebugMsg," # line2s read = %llu", n_line2s_read ); + VG_(message)(Vg_DebugMsg," # fpos read = %llu", n_fpos_read ); + } +} + + +/*------------------------------------------------------------*/ +/*--- ---*/ +/*--- TOP LEVEL for PDB reading ---*/ +/*--- ---*/ +/*------------------------------------------------------------*/ + +Bool ML_(read_pdb_debug_info)( + DebugInfo* di, + Addr obj_avma, + PtrdiffT unknown_purpose__reloc, + void* pdbimage, + SizeT n_pdbimage, + Char* pdbname, + ULong pdbmtime + ) +{ + Char* pe_seg_avma; + Int i; + Addr mapped_avma, mapped_end_avma; + unsigned signature; + void* hdr; + struct pdb_reader reader; + IMAGE_DOS_HEADER* dos_avma; + IMAGE_NT_HEADERS* ntheaders_avma; + IMAGE_SECTION_HEADER* sectp_avma; + IMAGE_SECTION_HEADER* pe_sechdr_avma; + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, "Processing PDB file %s ", pdbname ); + + dos_avma = (IMAGE_DOS_HEADER *)obj_avma; + if (dos_avma->e_magic != IMAGE_DOS_SIGNATURE) + return False; + + ntheaders_avma + = (IMAGE_NT_HEADERS *)((Char*)dos_avma + dos_avma->e_lfanew); + if (ntheaders_avma->Signature != IMAGE_NT_SIGNATURE) + return False; + + sectp_avma + = (IMAGE_SECTION_HEADER *)( + (Char*)ntheaders_avma + + OFFSET_OF(IMAGE_NT_HEADERS, OptionalHeader) + + ntheaders_avma->FileHeader.SizeOfOptionalHeader + ); + + /* JRS: this seems like something of a hack. */ + di->soname = ML_(dinfo_strdup)("di.readpdb.rpdi.1", pdbname); + + /* someone (ie WINE) is loading a Windows PE format object. we + need to use its details to determine which area of memory is + executable... */ + pe_seg_avma + = (Char*)ntheaders_avma + + OFFSET_OF(IMAGE_NT_HEADERS, OptionalHeader) + + ntheaders_avma->FileHeader.SizeOfOptionalHeader; + + di->rx_map_avma = (Addr)obj_avma; + + /* Iterate over PE(?) headers. Try to establish the text_bias, + that's all we really care about. */ + for ( i = 0; + i < ntheaders_avma->FileHeader.NumberOfSections; + i++, pe_seg_avma += sizeof(IMAGE_SECTION_HEADER) ) { + pe_sechdr_avma = (IMAGE_SECTION_HEADER *)pe_seg_avma; + + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_UserMsg, + " Scanning PE section %s at avma %p svma %#lx", + pe_sechdr_avma->Name, pe_seg_avma, + pe_sechdr_avma->VirtualAddress); + + if (pe_sechdr_avma->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) + continue; + + mapped_avma = (Addr)obj_avma + pe_sechdr_avma->VirtualAddress; + mapped_end_avma = mapped_avma + pe_sechdr_avma->Misc.VirtualSize; + if (VG_(clo_verbosity) > 1) + VG_(message)(Vg_DebugMsg, + " ::: mapped_avma is %#lx", mapped_avma); + + if (pe_sechdr_avma->Characteristics & IMAGE_SCN_CNT_CODE) { + di->have_rx_map = True; + if (di->rx_map_avma == 0) { + di->rx_map_avma = mapped_avma; + } + if (di->rx_map_size==0) { + di->rx_map_foff = pe_sechdr_avma->PointerToRawData; + } + di->text_present = True; + if (di->text_avma==0) { + di->text_avma = mapped_avma; + } + di->text_size += pe_sechdr_avma->Misc.VirtualSize; + di->rx_map_size += pe_sechdr_avma->Misc.VirtualSize; + } + else if (pe_sechdr_avma->Characteristics + & IMAGE_SCN_CNT_INITIALIZED_DATA) { + di->have_rw_map = True; + if (di->rw_map_avma == 0) { + di->rw_map_avma = mapped_avma; + } + if (di->rw_map_size==0) { + di->rw_map_foff = pe_sechdr_avma->PointerToRawData; + } + di->data_present = True; + if (di->data_avma==0) { + di->data_avma = mapped_avma; + } + di->rw_map_size += pe_sechdr_avma->Misc.VirtualSize; + di->data_size += pe_sechdr_avma->Misc.VirtualSize; + } + else if (pe_sechdr_avma->Characteristics + & IMAGE_SCN_CNT_UNINITIALIZED_DATA) { + di->bss_present = True; + di->bss_avma = mapped_avma; + di->bss_size = pe_sechdr_avma->Misc.VirtualSize; + } + + mapped_avma = VG_PGROUNDDN(mapped_avma); + mapped_end_avma = VG_PGROUNDUP(mapped_end_avma); + + /* Urr. These tests are bogus; ->rx_map_avma is not necessarily + the start of the text section. */ + if ((1 /*VG_(needs).data_syms*/ + || (pe_sechdr_avma->Characteristics & IMAGE_SCN_CNT_CODE)) + && mapped_avma >= di->rx_map_avma + && mapped_avma <= (di->rx_map_avma+di->text_size) + && mapped_end_avma > (di->rx_map_avma+di->text_size)) { + UInt newsz = mapped_end_avma - di->rx_map_avma; + if (newsz > di->text_size) { + /* extending the mapping is always needed for PE files + under WINE */ + di->text_size = newsz; + di->rx_map_size = newsz; + } + } + } + + if (di->have_rx_map && di->have_rw_map && !di->have_dinfo) { + vg_assert(di->filename); + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("------ start PE OBJECT with PDB INFO " + "---------------------\n"); + TRACE_SYMTAB("------ name = %s\n", di->filename); + TRACE_SYMTAB("\n"); + } + + if (di->text_present) { + di->text_bias = di->text_avma - di->text_svma; + } else { + di->text_bias = 0; + } + + if (VG_(clo_verbosity) > 1) { + VG_(message)(Vg_DebugMsg, + "rx_map: avma %#lx size %7lu foff %llu\n", + di->rx_map_avma, di->rx_map_size, (Off64T)di->rx_map_foff); + VG_(message)(Vg_DebugMsg, + "rw_map: avma %#lx size %7lu foff %llu\n", + di->rw_map_avma, di->rw_map_size, (Off64T)di->rw_map_foff); + + VG_(message)(Vg_DebugMsg, + " text: avma %#lx svma %#lx size %7lu bias %#lx\n", + di->text_avma, di->text_svma, di->text_size, di->text_bias); + } + + /* + * Read in TOC and well-known files + */ + signature = 0; + hdr = find_pdb_header( pdbimage, &signature ); + if (0==hdr) + return False; /* JRS: significance? no pdb header? */ + + VG_(memset)(&reader, 0, sizeof(reader)); + reader.u.jg.header = hdr; + + if (0==VG_(strncmp)((char const *)&signature, "DS\0\0", 4)) { + struct PDB_DS_ROOT* root; + pdb_ds_init( &reader, pdbimage, n_pdbimage ); + root = reader.read_file( &reader, 1, 0 ); + if (root) { + pdb_check_root_version_and_timestamp( + pdbname, pdbmtime, root->version, root->TimeDateStamp ); + ML_(dinfo_free)( root ); + } + pdb_dump( &reader, di, obj_avma, unknown_purpose__reloc, sectp_avma ); + } + else + if (0==VG_(strncmp)((char const *)&signature, "JG\0\0", 4)) { + struct PDB_JG_ROOT* root; + pdb_jg_init( &reader, pdbimage, n_pdbimage ); + root = reader.read_file( &reader, 1, 0 ); + if (root) { + pdb_check_root_version_and_timestamp( + pdbname, pdbmtime, root->version, root->TimeDateStamp); + ML_(dinfo_free)( root ); + } + pdb_dump( &reader, di, obj_avma, unknown_purpose__reloc, sectp_avma ); + } + + if (1) { + TRACE_SYMTAB("\n------ Canonicalising the " + "acquired info ------\n"); + /* prepare read data for use */ + ML_(canonicaliseTables)( di ); + /* notify m_redir about it */ + TRACE_SYMTAB("\n------ Notifying m_redir ------\n"); + VG_(redir_notify_new_DebugInfo)( di ); + /* Note that we succeeded */ + di->have_dinfo = True; + } else { + TRACE_SYMTAB("\n------ PE with PDB reading failed ------\n"); + /* Something went wrong (eg. bad ELF file). Should we delete + this DebugInfo? No - it contains info on the rw/rx + mappings, at least. */ + } + + TRACE_SYMTAB("\n"); + TRACE_SYMTAB("------ name = %s\n", di->filename); + TRACE_SYMTAB("------ end PE OBJECT with PDB INFO " + "--------------------\n"); + TRACE_SYMTAB("\n"); + + return True; +} + + +/*--------------------------------------------------------------------*/ +/*--- end readpdb.c ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/readstabs.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/readstabs.c.svn-base new file mode 100644 index 0000000..8a3c6e1 --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/readstabs.c.svn-base @@ -0,0 +1,380 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read stabs debug info. readstabs.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ +/* + Stabs reader greatly improved by Nick Nethercote, Apr 02. + This module was also extensively hacked on by Jeremy Fitzhardinge + and Tom Hughes. +*/ + +#include "pub_core_basics.h" +#include "pub_core_debuginfo.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_xarray.h" +#include "priv_misc.h" /* dinfo_zalloc/free/strdup */ +#include "priv_tytypes.h" +#include "priv_d3basics.h" +#include "priv_storage.h" +#include "priv_readstabs.h" /* self */ + +/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ +#include <a.out.h> /* stabs defns */ +/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ + +/*------------------------------------------------------------*/ +/*--- Read STABS format debug info. ---*/ +/*------------------------------------------------------------*/ + +/* Stabs entry types, from: + * The "stabs" debug format + * Menapace, Kingdon and MacKenzie + * Cygnus Support + */ +typedef enum { N_UNDEF = 0, /* undefined symbol, new stringtab */ + N_GSYM = 32, /* Global symbol */ + N_FUN = 36, /* Function start or end */ + N_STSYM = 38, /* Data segment file-scope variable */ + N_LCSYM = 40, /* BSS segment file-scope variable */ + N_RSYM = 64, /* Register variable */ + N_SLINE = 68, /* Source line number */ + N_SO = 100, /* Source file path and name */ + N_LSYM = 128, /* Stack variable or type */ + N_BINCL = 130, /* Beginning of an include file */ + N_SOL = 132, /* Include file name */ + N_PSYM = 160, /* Function parameter */ + N_EINCL = 162, /* End of an include file */ + N_LBRAC = 192, /* Start of lexical block */ + N_EXCL = 194, /* Placeholder for an include file */ + N_RBRAC = 224 /* End of lexical block */ + } stab_types; + + +/* Read stabs-format debug info. This is all rather horrible because + stabs is a underspecified, kludgy hack. +*/ +void ML_(read_debuginfo_stabs) ( DebugInfo* di, + UChar* stabC, Int stab_sz, + UChar* stabstr, Int stabstr_sz ) +{ + const Bool debug = False; + const Bool contdebug = False; + Int i; + Int n_stab_entries; + struct nlist* stab = (struct nlist*)stabC; + UChar *next_stabstr = NULL; + /* state for various things */ + struct { + Addr start; /* start address */ + Addr end; /* end address */ + Int line; /* first line */ + } func = { 0, 0, -1 }; + struct { + Char *name; + Bool same; + } file = { NULL, True }; + struct { + Int prev; /* prev line */ + Int no; /* current line */ + Int ovf; /* line wrap */ + Addr addr; /* start of this line */ + Bool first; /* first line in function */ + } line = { 0, 0, 0, 0, False }; + + /* Ok. It all looks plausible. Go on and read debug data. + stab kinds: 100 N_SO a source file name + 68 N_SLINE a source line number + 36 N_FUN start of a function + + In this loop, we maintain a current file name, updated as + N_SO/N_SOLs appear, and a current function base address, + updated as N_FUNs appear. Based on that, address ranges for + N_SLINEs are calculated, and stuffed into the line info table. + + Finding the instruction address range covered by an N_SLINE is + complicated; see the N_SLINE case below. + */ + file.name = ML_(addStr)(di,"???", -1); + + n_stab_entries = stab_sz/(int)sizeof(struct nlist); + + for (i = 0; i < n_stab_entries; i++) { + const struct nlist *st = &stab[i]; + Char *string; + + if (debug && 1) { + VG_(printf) ( "%2d type=%d othr=%d desc=%d value=0x%x strx=%d %s\n", i, + st->n_type, st->n_other, st->n_desc, + (int)st->n_value, + (int)st->n_un.n_strx, + stabstr + st->n_un.n_strx ); + } + + /* handle continued string stabs */ + { + Int qbuflen = 0; + Int qidx = 0; + Char* qbuf = NULL; + Int qlen; + Bool qcontinuing = False; + UInt qstringidx; + + qstringidx = st->n_un.n_strx; + string = stabstr + qstringidx; + qlen = VG_(strlen)(string); + + while (string + && qlen > 0 + && (qcontinuing || string[qlen-1] == '\\')) { + /* Gak, we have a continuation. Skip forward through + subsequent stabs to gather all the parts of the + continuation. Increment i, but keep st pointing at + current stab. */ + + qcontinuing = string[qlen-1] == '\\'; + + /* remove trailing \ */ + while (string[qlen-1] == '\\' && qlen > 0) + qlen--; + + if (contdebug) + VG_(printf)("found extension string: \"%s\" " + "len=%d(%c) idx=%d buflen=%d\n", + string, qlen, string[qlen-1], qidx, qbuflen); + + /* XXX this is silly. The si->strtab should have a way of + appending to the last added string... */ + if ((qidx + qlen) >= qbuflen) { + Char *n; + + if (qbuflen == 0) + qbuflen = 16; + while ((qidx + qlen) >= qbuflen) + qbuflen *= 2; + n = ML_(dinfo_zalloc)("di.readstabs.rds.1", qbuflen); + VG_(memcpy)(n, qbuf, qidx); + + if (qbuf != NULL) + ML_(dinfo_free)(qbuf); + qbuf = n; + } + + VG_(memcpy)(&qbuf[qidx], string, qlen); + qidx += qlen; + if (contdebug) { + qbuf[qidx] = '\0'; + VG_(printf)("working buf=\"%s\"\n", qbuf); + } + + i++; + if (i >= n_stab_entries) + break; + + if (stab[i].n_un.n_strx) { + string = stabstr + stab[i].n_un.n_strx; + qlen = VG_(strlen)(string); + } else { + string = NULL; + qlen = 0; + } + } + + if (qbuf != NULL) { + i--; /* overstepped */ + string = ML_(addStr)(di, qbuf, qidx); + ML_(dinfo_free)(qbuf); + if (contdebug) + VG_(printf)("made composite: \"%s\"\n", string); + } + } + + switch(st->n_type) { + case N_UNDEF: + /* new string table base */ + if (next_stabstr != NULL) { + stabstr_sz -= next_stabstr - stabstr; + stabstr = next_stabstr; + if (stabstr_sz <= 0) { + VG_(printf)(" @@ bad stabstr size %d\n", stabstr_sz); + return; + } + } + next_stabstr = stabstr + st->n_value; + break; + + case N_BINCL: { + break; + } + + case N_EINCL: + break; + + case N_EXCL: + break; + + case N_SOL: /* sub-source (include) file */ + if (line.ovf != 0) + VG_(message)(Vg_UserMsg, + "Warning: file %s is very big (> 65535 lines) " + "Line numbers and annotation for this file might " + "be wrong. Sorry", + file.name); + /* FALLTHROUGH */ + + case N_SO: { /* new source file */ + UChar *nm = string; + UInt len = VG_(strlen)(nm); + Addr addr = func.start + st->n_value; + + if (line.addr != 0) { + /* finish off previous line */ + ML_(addLineInfo)(di, file.name, NULL, line.addr, + addr, line.no + line.ovf * LINENO_OVERFLOW, i); + } + + /* reset line state */ + line.ovf = 0; + line.addr = 0; + line.prev = 0; + line.no = 0; + + if (len > 0 && nm[len-1] != '/') { + file.name = ML_(addStr)(di, nm, -1); + if (debug) + VG_(printf)("new source: %s\n", file.name); + } else if (len == 0) + file.name = ML_(addStr)(di, "?1\0", -1); + + break; + } + + case N_SLINE: { /* line info */ + Addr addr = func.start + st->n_value; + + if (line.addr != 0) { + /* there was a previous */ + ML_(addLineInfo)(di, file.name, NULL, line.addr, + addr, line.no + line.ovf * LINENO_OVERFLOW, i); + } + + line.addr = addr; + line.prev = line.no; + line.no = (Int)((UShort)st->n_desc); + + if (line.prev > line.no + OVERFLOW_DIFFERENCE && file.same) { + VG_(message)(Vg_DebugMsg, + "Line number overflow detected (%d --> %d) in %s", + line.prev, line.no, file.name); + line.ovf++; + } + file.same = True; + + /* This is pretty horrible. If this is the first line of + the function, then bind any unbound symbols to the arg + scope, since they're probably arguments. */ + if (line.first) { + line.first = False; + + /* remember first line of function */ + if (func.start != 0) { + func.line = line.no; + } + } + break; + } + + case N_FUN: { /* function start/end */ + Addr addr = 0; /* end address for prev line/scope */ + Bool newfunc = False; + + /* if this the end of the function or we haven't + previously finished the previous function... */ + if (*string == '\0' || func.start != 0) { + /* end of function */ + newfunc = False; + line.first = False; + + /* end line at end of function */ + addr = func.start + st->n_value; + + /* now between functions */ + func.start = 0; + + // XXXX DEAD POINT XXXX + } + + if (*string != '\0') { + /* new function */ + newfunc = True; + line.first = True; + + /* line ends at start of next function */ + addr = di->text_debug_bias + st->n_value; + + func.start = addr; + } + + if (line.addr) { + ML_(addLineInfo)(di, file.name, NULL, line.addr, + addr, line.no + line.ovf * LINENO_OVERFLOW, i); + line.addr = 0; + } + + //DEAD POINT + //DEAD POINT + break; + } + + case N_LBRAC: { + /* open new scope */ + // DEAD POINT + break; + } + + case N_RBRAC: { + /* close scope */ + // DEAD POINT + break; + } + + case N_GSYM: /* global variable */ + case N_STSYM: /* static in data segment */ + case N_LCSYM: /* static in bss segment */ + case N_PSYM: /* function parameter */ + case N_LSYM: /* stack variable */ + case N_RSYM: /* register variable */ + break; + } + } +} + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/readxcoff.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/readxcoff.c.svn-base new file mode 100644 index 0000000..4435d7c --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/readxcoff.c.svn-base @@ -0,0 +1,2486 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read XCOFF debug info. readxcoff.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2006-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +/* This file reads XCOFF symbol tables and debug info. + Known limitations: + + * only one text section per object file is handled + + * C_BINCL/C_EINCL handling is wrong, so functions defined in files + included from other files will end up with the wrong file name + and possibly line numbers. Fixable. + + * The line number reader leans heavily on the fact that the generic + line number canonicaliser in storage.c truncates overlapping + ranges. +*/ + +#include "pub_core_basics.h" +#include "pub_core_vki.h" /* struct vki_stat et al */ +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_libcfile.h" /* stat, open, close */ +#include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */ +#include "pub_core_options.h" /* VG_(clo_trace_symtab) */ +#include "pub_core_xarray.h" +#include "priv_misc.h" +#include "priv_tytypes.h" +#include "pub_tool_debuginfo.h" +#include "priv_d3basics.h" +#include "priv_storage.h" +#include "priv_readxcoff.h" /* self */ + +/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ +#if defined(VGP_ppc32_aix5) +# define __XCOFF32__ 1 +# undef __XCOFF64__ +#elif defined(VGP_ppc64_aix5) +# define __XCOFF64__ 1 +# undef __XCOFF32__ +#else +# error "This file should only be compiled on AIX" +#endif +#include <xcoff.h> + +#undef __AR_SMALL__ +#define __AR_BIG__ 1 +#include <ar.h> +/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ + +/* Debug stuff */ +#define SHOW_LD_STRTAB 1 /* loader string tables */ +#define SHOW_LD_SYMTAB 1 /* loader symbol table */ +#define SHOW_LD_RELTAB 1 /* loader reloc table */ +#define SHOW_STRTAB 1 /* main string table */ +#define SHOW_SYMS_P1 1 /* P1: find text sym starts */ +#define SHOW_SYMS_P2 1 /* P2: find text sym ends */ +#define SHOW_SYMS_P3 1 /* P3: src filenames & fn start/end line #s */ +#define SHOW_SYMS_P4 1 /* P4: line numbers */ +#define SHOW_SYMS_P5 1 /* P5: find TOC pointers */ +#define SHOW_SYMS_P6 1 /* P6: finalise symbol info */ + +#define SHOW_AR_DETAILS 0 /* show details of .a file internals */ + +#define SHOW di->trace_symtab + +/* A small stack of filenames is maintained for dealing + with BINCL/EINCL symbol table entries. */ + +#define N_FILENAME_STACK 16 + +/* Phase 5 (find TOC pointers) has two implementations, the official + version, which involves reading the data segment symbols, and the + kludgey version, which basically scans the (actual loaded) data + segment to find structs which look like function descriptors. */ + +#if 1 +# undef OFFICIAL_PHASE5 +#else +# define OFFICIAL_PHASE5 1 +#endif + +/*------------------------------------------------------------*/ +/*--- Read XCOFF format debug info. ---*/ +/*------------------------------------------------------------*/ + + +/* COFF uses a strange way to represent symbol names. A symbol is an + eight-byte field. + + In 32-bit mode: if the first four bytes are zero, then the second + four bytes give the offset into the string table where the string + really is. Otherwise, the whole 8-byte thing is itself the name. + + In 64-bit mode: a four-byte field at offset 8 is always interpreted + as an offset into the string table. + + For a symbol of length 8, in 32-bit mode, there is no obvious way + to zero-terminate it. One solution is to copy the name into + dynamically allocated memory, but that complicates storage + management. + + An alternative solution, used here, is to represent a name as a + (data, length) pair instead of the traditional zero-terminated + string. Such a pair can be constructed for any XCOFF symbol name, + and has the advantages that (1) no dynamic memory is required, and + (2) the name is guaranteed to be accessible as long as the object + image is mapped in. + + What the .vec points at must not be modified; if you want to do + that, copy it elsewhere first. +*/ + +typedef + struct { + UChar* vec; /* the text of the name */ + UInt len; /* length of the text */ + } + Name; + +static Name maybeDerefStrTab( SYMENT* sym, + UChar* oi_strtab, UWord oi_n_strtab) +{ + Name res; + static UChar* bogus + = (UChar*)"**_Error_Dereferencing_COFF_String_Table_**"; + UChar* bytes = (UChar*)sym; + +# if defined(VGP_ppc32_aix5) + if (bytes[0]==0 && bytes[1]==0 && bytes[2]==0 && bytes[3]==0) { + UInt off = *(UInt*)&bytes[4]; + if (oi_strtab && oi_n_strtab > 0 && off < oi_n_strtab) { + res.vec = &oi_strtab[off]; + res.len = VG_(strlen)(res.vec); + return res; + } else + goto bad; + } else { + Int i; + res.vec = bytes; + res.len = 8; + for (i = 0; i < 8; i++) + if (bytes[i] == 0) + res.len--; + return res; + } + +# elif defined(VGP_ppc64_aix5) + ULong off = (ULong)( *(UInt*)&bytes[8] ); + if (oi_strtab && oi_n_strtab > 0 && off < oi_n_strtab) { + res.vec = &oi_strtab[off]; + res.len = VG_(strlen)(res.vec); + return res; + } else + goto bad; + +# else +# error "Unknown platform" +# endif + + bad: + res.vec = bogus; + res.len = VG_(strlen)(bogus); + return res; +} + + +/* Similar scheme for extracting names from C_FILE auxiliary entries, + except that the 32-bit scheme appears to be always used, even for + XCOFF64. */ + +static Name maybeDerefStrTab_fname ( UChar* bytes, + UChar* oi_strtab, UWord oi_n_strtab) +{ + Name res; + static UChar* bogus + = (UChar*)"**_Error_Dereferencing_COFF_String_Table_**"; + + if (bytes[0]==0 && bytes[1]==0 && bytes[2]==0 && bytes[3]==0) { + UInt off = *(UInt*)&bytes[4]; + if (oi_strtab && oi_n_strtab > 0 && off < oi_n_strtab) { + res.vec = &oi_strtab[off]; + res.len = VG_(strlen)(res.vec); + return res; + } else + goto bad; + } else { + Int i; + res.vec = bytes; + res.len = 8; + for (i = 0; i < 8; i++) + if (bytes[i] == 0) + res.len--; + return res; + } + + bad: + res.vec = bogus; + res.len = VG_(strlen)(bogus); + return res; +} + + +static Name mk_const_Name ( HChar* str ) +{ + Name res; + res.vec = str; + res.len = VG_(strlen)(res.vec); + return res; +} + +static Name mk_empty_Name ( void ) +{ + Name res; + res.vec = ""; + res.len = 0; + return res; +} + +static Bool is_empty_Name ( Name name ) +{ + return name.len == 0; +} + +static Bool eq_string_Name ( Name name, UChar* str ) +{ + UInt i; + for (i = 0; i < name.len; i++) { + if (str[i] == 0) + return False; + if (str[i] != name.vec[i]) + return False; + } + if (str[name.len] == 0) + return True; + else + return False; +} + +static Int cmp_Names ( Name n1, Name n2 ) +{ + UInt i = 0; + while (1) { + vg_assert(i >= 0 && i <= n1.len); + vg_assert(i >= 0 && i <= n2.len); + if (i == n1.len && i == n2.len) + return 0; + if (i == n1.len && i < n2.len) + return -1; + if (i < n1.len && i == n2.len) + return 1; + if (n1.vec[i] < n2.vec[i]) + return -1; + if (n1.vec[i] > n2.vec[i]) + return 1; + i++; + } +} + +static void print_Name ( Name name ) +{ + UInt i; + for (i = 0; i < name.len; i++) + VG_(printf)("%c", name.vec[i]); +} + + +static UChar sanitiseChar ( UChar c ) +{ + if (c < 32 || c > 127) + c = '?'; + return c; +} + +static HChar* name_of_filhdr_f_magic ( Int magic ) +{ + switch (magic) { + case 0x01DF: return "xcoff32"; + case 0x01EF: return "xcoff64-upto-aix43"; + case 0x01F7: return "xcoff64-from-aix51"; + default: return "unknown-xcoff-header-magic"; + } +} + +static HChar* name_of_scnhdr_s_flags ( Int flags ) +{ + switch (flags & 0xFFFF) { + case STYP_REG: return "\"regular\""; + case STYP_PAD: return "\"padding\""; + case STYP_TEXT: return "text only"; + case STYP_DATA: return "data only"; + case STYP_BSS: return "bss only"; + case STYP_EXCEPT: return "Exception"; + case STYP_INFO: return "Comment"; + case STYP_LOADER: return "Loader"; + case STYP_DEBUG: return "Debug"; + case STYP_TYPCHK: return "Typecheck"; + case STYP_OVRFLO: return "Overflow"; + default: return "unknown-section-header-name"; + } +} + +static HChar* name_of_syment_n_sclass ( Int sclass ) +{ + static HChar buf[10]; + switch (sclass) { + /* dbx ones (>= 0x80) */ + case C_GSYM: return "gsym"; + case C_LSYM: return "lsym"; + case C_PSYM: return "psym"; + case C_RSYM: return "rsym"; + case C_RPSYM: return "rpsym"; + case C_STSYM: return "stsym"; + case C_DECL: return "decl"; + case C_FUN: return "fun"; + case C_BSTAT: return "bstat"; + case C_ESTAT: return "estat"; + /* non-dbx ones (< 0x80) */ + case C_STAT: return "STAT"; + case C_FILE: return "FILE"; + case C_HIDEXT: return "HIDEXT"; + case C_EXT: return "EXT"; + case C_FCN: return "FCN"; + case C_BINCL: return "BINCL"; + case C_EINCL: return "EINCL"; + case C_BLOCK: return "BLOCK"; + case C_WEAKEXT: return "WEAKEXT"; + default: + VG_(sprintf)(buf, "??%d??", sclass); + return buf; + } +} + +typedef + struct { + Name name; /* symbol's name */ + Addr first; /* first address; always known */ + Addr last; /* last address; may be an overestimate */ + + Name fname; /* source file name, if known */ + Int slnno; /* starting line #, or 0 if unknown */ + Int elnno; /* ending line #, or 0 if unknown */ + + UWord r2value; /* what r2 should be for this fn (tocptr) */ + Bool r2known; /* do we have a r2 value? */ + } + XCoffSym; + +static void init_XCoffSym( XCoffSym* sym ) +{ + sym->name = mk_empty_Name(); + sym->first = 0; + sym->last = 0; + sym->fname = mk_empty_Name(); + sym->slnno = 0; + sym->elnno = 0; + sym->r2known = False; + sym->r2value = False; +} + +/* Compare XCoffSyms by their start address. */ +static Int cmp_XCoffSym_by_start ( void* v1, void* v2 ) +{ + XCoffSym* s1 = (XCoffSym*)v1; + XCoffSym* s2 = (XCoffSym*)v2; + if (s1->first < s2->first) return -1; + if (s1->first > s2->first) return 1; + return 0; +} + +/* Compare XCoffSyms by a slightly weaker ordering, returning zero + (equivalence) for any overlap, and -1 or 1 otherwise. */ +static Int cmp_XCoffSym_by_overlap ( void* v1, void* v2 ) +{ + XCoffSym* s1 = (XCoffSym*)v1; + XCoffSym* s2 = (XCoffSym*)v2; + if (s1->last < s2->first) return -1; + if (s2->last < s1->first) return 1; + return 0; +} + +/* Compare XCoffSyms by their start address, and for equal addresses, + use the name as a secondary sort key. */ +static Int cmp_XCoffSym_by_start_then_name ( void* v1, void* v2 ) +{ + XCoffSym* s1 = (XCoffSym*)v1; + XCoffSym* s2 = (XCoffSym*)v2; + if (s1->first < s2->first) return -1; + if (s1->first > s2->first) return 1; + return cmp_Names(s1->name, s2->name); +} + + +/* csect_idx is an index in the symbol table (start, n_entries) to a + symbol defining a csect. If possible, find the bounds of the csect + and assign them to *first and *last, and return True; else return + False. sntext_1based_if_known is the 1-based number of the text + section. Note: computes stated VMAs, not actual VMAs. */ + +#if defined(VGP_ppc32_aix5) +# define SMTYP_SMTYP(x) ((x) & 0x7) /* symbol type */ +# define CSECT(PP) (((AUXENT*)(PP))->x_csect) +# define CSECT_LEN(PP) (CSECT(PP).x_scnlen) +# define CSECT_ALIGN(PP) (SMTYP_ALIGN(CSECT(PP).x_smtyp)) +# define CSECT_SMTYP(PP) (SMTYP_SMTYP(CSECT(PP).x_smtyp)) +# define CSECT_SCLAS(PP) (CSECT(PP).x_smclas) + +#elif defined(VGP_ppc64_aix5) +# define SMTYP_SMTYP(x) ((x) & 0x7) /* symbol type */ +# define CSECT(PP) (((AUXENT*)(PP))->x_csect) +# define CSECT_LEN(PP) ((((ULong)(CSECT(PP).x_scnlen_hi)) << 32) \ + | ((ULong)(CSECT(PP).x_scnlen_lo))) +# define CSECT_ALIGN(PP) (SMTYP_ALIGN(CSECT(PP).x_smtyp)) +# define CSECT_SMTYP(PP) (SMTYP_SMTYP(CSECT(PP).x_smtyp)) +# define CSECT_SCLAS(PP) (CSECT(PP).x_smclas) + +#else +# error "Unknown platform" + +#endif + + +#define SYM_IX(_tab,_n) ((SYMENT*)(((UChar*)(_tab)) + SYMESZ * (_n))) + +static +Bool get_csect_bounds ( UChar* start, UWord n_entries, + UWord csect_idx, + Int sntext_1based_if_known, + /*OUT*/UChar** first, /*OUT*/UChar** last ) +{ + Bool is_text; + SYMENT* cssym; + AUXENT* csaux; + + vg_assert(SYMESZ == 18); /* both for XCOFF32 and XCOFF64 */ + + if (n_entries < 2) + return False; + if (csect_idx+1 >= n_entries) + return False; + cssym = (SYMENT*)SYM_IX(start, csect_idx); + csaux = (AUXENT*)SYM_IX(start, csect_idx+1); + is_text = sntext_1based_if_known != -1 + && (Int)cssym->n_scnum == sntext_1based_if_known; + + if (!is_text) + return False; + + if (cssym->n_sclass == C_EXT || cssym->n_sclass == C_HIDEXT) { + if (cssym->n_numaux == 1) { + if (CSECT_SMTYP(csaux) == XTY_SD) { + if (0) VG_(printf)("GCB: SD: len is %lld\n", (Long)CSECT_LEN(csaux)); + *first = (UChar*)(cssym->n_value); + *last = *first + CSECT_LEN(csaux)-1; + return True; + } + } else { + /* Possibly complain or take evasive action here. In fact + I've yet to see a case where a csect definition symbol has + n_numaux != 1. */ + } + } + return False; +} + +/* Read symbol and line number info for the given text section. (This + is the central routine for XCOFF reading.) Returns NULL on + success, or the text of an error message otherwise. */ +static +HChar* read_symbol_table ( + /*MOD*/struct _DebugInfo* di, + + /* location of symbol table */ + UChar* oi_symtab, UWord oi_nent_symtab, + + /* location of string table */ + UChar* oi_strtab, UWord oi_n_strtab, + + /* location of debug section (stabs strings, if any) */ + UChar* oi_debug, UWord oi_n_debug, + + /* location of line number info, if any */ + UChar* oi_lnos, UWord oi_nent_lnos, + + /* section indices */ + Int sntext_1based_if_known, + Int sndata_1based_if_known, + + /* where the mapped data section is */ + /* Now in di->data_avma: Addr data_avma, */ + /* Now in di->data_size: UWord data_alen, */ + UWord data_alen_from_auxhdr, + + /* where the mapped toc is (in the data section, + presumably), if known */ + Addr toc_avma, + + /* stated-to-actual VMA offsets */ + Word text_bias, + Word data_bias + ) +{ + SYMENT* sym; + SYMENT* aux; + UInt i, j, nsyms, k, m; + Name name; + Bool is_text, is_data; + XArray* syms = NULL; /* XArray of XCoffSyms */ + + /* If the TOC avma is obviously bogus, get rid of it */ + { + UWord data_maxlen = di->data_size; + if (data_maxlen < data_alen_from_auxhdr) + data_maxlen = data_alen_from_auxhdr; + + //VG_(printf)(" toc_avma %p\n", toc_avma); + //VG_(printf)("data_avma %p\n", data_avma); + //VG_(printf)("dxxx_avma %p\n", data_avma + data_maxlen); + + if (toc_avma != 0 + && (toc_avma < di->data_avma + || toc_avma >= di->data_avma + data_maxlen)) + toc_avma = 0; + //VG_(printf)("2toc_avma %p\n", toc_avma); + } + + /* We can't just treat this as an array of SYMENTs, because C + thinks they have size 20 whereas the spec says they have size 18 + (alignment padding) so doing the obvious thing screws up. Hence + we have to calculate the offset of each entry manually. */ + + if (0) VG_(printf)("size of SYMENT = %ld\n", sizeof(SYMENT)); + + /* ---------------------------------------------------------- + Phase 1: first make a pass through the symbols, looking for + stuff in the text segment. Calculate their actual VMAs, + dump any outside the text segment actual VMA bounds, and + add the rest to 'syms'. + ---------------------------------------------------------- */ + + syms = VG_(newXA)( ML_(dinfo_zalloc), "di.readxcoff.rst.1", + ML_(dinfo_free), sizeof(XCoffSym) ); + + if (SHOW && SHOW_SYMS_P1) { + VG_(printf)("--- BEGIN Phase1 (find text symbol starts) ---\n"); + VG_(printf)("--- note: shown addresses are STATED VMAs ---\n"); + } + + i = 0; + while (1) { + + if (i >= oi_nent_symtab) + break; + + sym = SYM_IX(oi_symtab, i); + is_text = sntext_1based_if_known != -1 + && (Int)sym->n_scnum == sntext_1based_if_known; + is_data = sndata_1based_if_known != -1 + && (Int)sym->n_scnum == sndata_1based_if_known; + + if (SHOW && SHOW_SYMS_P1) + VG_(printf)("Phase1: %5d+%d ", i, (Int)sym->n_numaux); + + name = mk_const_Name("(unknown)"); + if (sym->n_scnum == N_DEBUG && sym->n_sclass == C_FUN) + name = maybeDerefStrTab( sym, oi_debug, oi_n_debug ); + else + if (sym->n_sclass & DBXMASK) + name = mk_const_Name("(dbxstr)"); + else + name = maybeDerefStrTab( sym, oi_strtab, oi_n_strtab); + + if (SHOW && SHOW_SYMS_P1) { + VG_(printf)("%5s(%2d) %6s 0x%016llx ", + is_text ? "text" : is_data ? "data" : "other", + (Int)sym->n_scnum, + name_of_syment_n_sclass(sym->n_sclass), + (ULong)sym->n_value); + print_Name(name); + VG_(printf)("\n"); + } + + i++; + i += sym->n_numaux; + + if (!is_text) + continue; + + /* --- BEGIN regular(ish) symbol --- */ + if ((sym->n_sclass == C_EXT || sym->n_sclass == C_HIDEXT) + && (sym->n_numaux == 1 || sym->n_numaux == 2)) { + /* Dealing with a symbol with a csect entry. By convention + (according to IBM docs) the csect entry is the last + auxiliary for this symbol, if there is more than one + auxiliary present; hence "SYM_IX(oi_symtab, i-1)" below. */ + + aux = SYM_IX(oi_symtab, i-1); + if (0) VG_(printf)("symtype is %d\n", CSECT_SMTYP(aux)); + + if (CSECT_SMTYP(aux) == XTY_SD) { + /* Aux is a csect definition. This is relatively rare, + but at least it is simple: the CSECT_LEN(aux) field + contains it's length, so we just heave that into the + pot for phase 2. */ + XCoffSym cand; + if (0) VG_(printf)("SD: len is %d\n", (Int)CSECT_LEN(aux)); + if (0) VG_(printf)("SD: proposed %#llx\n", (ULong)sym->n_value); + init_XCoffSym(&cand); + cand.first = sym->n_value; + cand.last = cand.first + (UWord)CSECT_LEN(aux) - 1; + + cand.first += text_bias; + cand.last += text_bias; + cand.name = name; + + if (cand.last < di->text_avma + || cand.first >= di->text_avma + di->text_size) + continue; + if (cand.last < cand.first) + continue; + if (is_empty_Name(name)) + continue; + (void)VG_(addToXA)(syms, &cand); + } + + if (CSECT_SMTYP(aux) == XTY_LD) { + /* Aux is a label definition. This is the common case. */ + XCoffSym cand; + Bool ok; + UChar *csect_first, *csect_last; + /* x_scnlen contains the symbol table entry of the + containing csect. Use the symbol's stated vma and csect + end as the initial approximation of this symbol's start + and length. The length will get revised downwards in + Phase 2. */ + init_XCoffSym(&cand); + ok = get_csect_bounds( oi_symtab, oi_nent_symtab, + CSECT_LEN(aux), + sntext_1based_if_known, + &csect_first, &csect_last ); + if (0 && ok) + VG_(printf)("new csect svma %p %p\n", csect_first, csect_last); + if (ok && ((UWord)csect_first) <= ((UWord)sym->n_value) + && ((UWord)sym->n_value) <= ((UWord)csect_last)) { + if (0) { + VG_(printf)("LD: in a csect %p %p\n", + csect_first, csect_last); + VG_(printf)("CAND: %p .. %p %s\n", + (void*)sym->n_value, (void*)csect_last, + "fixme-Name-printing(1)" /*name*/); + } + cand.first = sym->n_value; + cand.last = (Addr)csect_last; + } else { + if (0) { + VG_(printf)("LD: can't compute csect bounds?!\n"); + VG_(printf)("CAND: %p .. %p %s\n", + (HChar*)sym->n_value, + (HChar*)sym->n_value+1, + "fixme-Name-printing(2)" /*name*/); + } + cand.first = sym->n_value; + cand.last = cand.first + 1; + } + + /* cand.first is a stated VMA; turn it into an actual VMA + and ignore it if not in the actual text segment. */ + + cand.first += text_bias; + cand.last += text_bias; + cand.name = name; + + if (cand.last < di->text_avma + || cand.first >= di->text_avma + di->text_size) + continue; + if (cand.last < cand.first) + continue; + if (is_empty_Name(name)) + continue; + + (void)VG_(addToXA)(syms, &cand); + } + } + /* --- END regular(ish) symbol --- */ + + } + + /* ---------------------------------------------------------- + Phase 2: suitable text symbols have been put into 'syms'. Their + start addresses are correct, but end addresses are those of the + containing csect, which is in general way too long. This phase + clips the ends so that the ranges no longer overlap, and thereby + constrains each symbol's range to something which, for the most + part, is correct. + ---------------------------------------------------------- */ + + nsyms = VG_(sizeXA)(syms); + + if (SHOW && SHOW_SYMS_P1) + VG_(printf)("Phase1 acquired %d text symbols\n", nsyms); + + if (SHOW && SHOW_SYMS_P2) { + VG_(printf)("--- BEGIN Phase2 (find text symbol ends) ---\n"); + VG_(printf)("--- note: shown addresses are ACTUAL VMAs ---\n"); + } + + VG_(setCmpFnXA)(syms, cmp_XCoffSym_by_start_then_name); + VG_(sortXA)(syms); + + /* We only know for sure the start addresses (actual VMAs) of + symbols, and an overestimation of their end addresses. So sort + by start address, then clip each symbol so that its end address + does not overlap with the next one along. + + There is a small refinement: if a group of symbols have the same + address, treat them as a group: find the next symbol along that + has a higher start address, and clip all of the group + accordingly. This clips the group as a whole so as not to + overlap following symbols. This leaves prefersym() in + storage.c, which is not XCOFF-specific, to later decide which of + the symbols in the group to keep. + + Another refinement is that we need to get rid of symbols which, + after clipping, have identical starts, ends, and names. So the + sorting uses the name as a secondary key. + */ + + for (i = 0; i < nsyms; i++) { + for (k = i+1; + k < nsyms + && ((XCoffSym*)VG_(indexXA)(syms,i))->first + == ((XCoffSym*)VG_(indexXA)(syms,k))->first; + k++) + ; + /* So now [i .. k-1] is a group all with the same start address. + Clip their ending addresses so they don't overlap [k]. In + the normal case (no overlaps), k == i+1. */ + if (k < nsyms) { + XCoffSym* next = (XCoffSym*)VG_(indexXA)(syms,k); + for (m = i; m < k; m++) { + XCoffSym* here = (XCoffSym*)VG_(indexXA)(syms,m); + vg_assert(here->first < next->first); + if (here->last >= next->first) + here->last = next->first-1; + } + } + i = k-1; + vg_assert(i <= nsyms); + } + + j = 0; + if (nsyms > 0) { + j = 1; + for (i = 1; i < nsyms; i++) { + vg_assert(j <= i); + XCoffSym* s_j1 = (XCoffSym*)VG_(indexXA)(syms, j-1); + XCoffSym* s_j = (XCoffSym*)VG_(indexXA)(syms, j); + XCoffSym* s_i = (XCoffSym*)VG_(indexXA)(syms, i); + if (s_i->first != s_j1->first + || s_i->last != s_j1->last + || 0 != cmp_Names(s_i->name, s_j1->name)) { + *s_j = *s_i; + j++; + } else { + if (SHOW && SHOW_SYMS_P2) { + VG_(printf)("Phase2: dump duplicate "); + print_Name(s_i->name); + VG_(printf)("\n"); + } + } + } + } + vg_assert(j >= 0 && j <= nsyms); + VG_(dropTailXA)(syms, nsyms - j); + nsyms = j; + + if (1) { + for (i = 0; i < nsyms; i++) { + XCoffSym* s = (XCoffSym*)VG_(indexXA)(syms, i); + if (SHOW && SHOW_SYMS_P2) { + VG_(printf)("Phase2: %d 0x%lx 0x%lx ", + i, s->first, s->last); + print_Name(s->name); + VG_(printf)("\n"); + } + } + } + + /* ---------------------------------------------------------- + Phase 3: rescan the symbol table, looking for info on function + start/end line numbers and source file names. Generally + this will be absent for sources compiled without -g. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("--- BEGIN Phase3 (find src filenames " + "& fn start/end line #s) ---\n"); + VG_(printf)("--- note: shown addresses are STATED VMAs ---\n"); + } + + /* The lookupXAs in the C_FUN(.bf) part have to operate by + inclusion. Hence: */ + VG_(setCmpFnXA)(syms, cmp_XCoffSym_by_overlap); + VG_(sortXA)(syms); + + /* In this loop, p3currsym is maintained as a pointer to the most + recent XCoffSym identified as FCN(.bf) (function start). + Subsequent FCN(.ef) (function end) indications are compared + against said symbol. This assumes that function start/end + indications are not nested. */ + + XCoffSym* p3currsym = NULL; + + /* Maintain a stack of filenames. We allow the stack pointer to go + beyond the end, but obviously nothing is stored in this + imaginary part of the stack. */ + Name filenames[N_FILENAME_STACK]; + Int filenames_used = 1; + + Name name_unknown = mk_empty_Name(); + Name name_overflow = mk_const_Name("(filename_stack_overflow)"); + + for (i = 0; i < N_FILENAME_STACK; i++) + filenames[i] = name_unknown; + +# define FNAME_PUSH(_fname) \ + do { \ + vg_assert(filenames_used >= 1);\ + if (filenames_used < N_FILENAME_STACK)\ + filenames[filenames_used] = (_fname);\ + filenames_used++;\ + } while (0) + +# define FNAME_POP \ + do {\ + vg_assert(filenames_used >= 1);\ + if (filenames_used > 1 && filenames_used <= N_FILENAME_STACK) \ + filenames[filenames_used-1] = name_unknown; \ + if (filenames_used > 1)\ + filenames_used--;\ + } while (0) + +# define FNAME_GET_TOP \ + (filenames_used > N_FILENAME_STACK \ + ? name_overflow \ + : filenames[filenames_used-1]) + +# define FNAME_SET_TOP(_fname) \ + do {\ + vg_assert(filenames_used >= 1);\ + filenames[filenames_used-1] = (_fname);\ + } while (0) + + + i = 0; + while (1) { + + if (i >= oi_nent_symtab) + break; + + sym = SYM_IX(oi_symtab, i); + is_text = sntext_1based_if_known != -1 + && (Int)sym->n_scnum == sntext_1based_if_known; + is_data = sndata_1based_if_known != -1 + && (Int)sym->n_scnum == sndata_1based_if_known; + + if (0 && SHOW && SHOW_SYMS_P3) + VG_(printf)("Phase3: %5d+%d ", i, (Int)sym->n_numaux); + + name = mk_const_Name("(unknown)"); + if (sym->n_scnum == N_DEBUG && sym->n_sclass == C_FUN) + name = maybeDerefStrTab( sym, oi_debug, oi_n_debug ); + else + if (sym->n_sclass & DBXMASK) + name = mk_const_Name("(dbxstr)"); + else + name = maybeDerefStrTab( sym, oi_strtab, oi_n_strtab); + + if (0 && SHOW && SHOW_SYMS_P3) { + VG_(printf)("%5s(%2d) %6s 0x%016llx ", + is_text ? "text" : is_data ? "data" : "other", + (Int)sym->n_scnum, + name_of_syment_n_sclass(sym->n_sclass), + (ULong)sym->n_value); + print_Name(name); + VG_(printf)("\n"); + } + + i++; + i += sym->n_numaux; + + /* --- BEGIN C_FILE [source file] --- */ + /* There are two variants of C_FILE: a simple one with n_numaux + == 0, where the primary name is what we're after, and another + variant with n_numaux == 3, in which we have to hunt around + in the auxiliary entries to find the file name. gcc produces + exclusively the first kind, and xlc a mixture of both. */ + if (sym->n_sclass == C_FILE && sym->n_numaux == 0) { + if (!is_empty_Name(name)) + FNAME_SET_TOP(name); + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("Phase3: %5d+%d FILE ", + i-1-sym->n_numaux, (Int)sym->n_numaux ); + print_Name(name); + VG_(printf)("\n"); + } + continue; + } + if (sym->n_sclass == C_FILE && sym->n_numaux > 1 + && sym->n_numaux <= 5 /*stay sane*/) { + for (k = 0; k < sym->n_numaux; k++) { + aux = SYM_IX(oi_symtab, i - sym->n_numaux + k); + Name fname + = maybeDerefStrTab_fname( + (UChar*)&((AUXENT*)aux)->x_file.x_fname, + oi_strtab, oi_n_strtab); + if (((AUXENT*)aux)->x_file._x.x_ftype == XFT_FN) { + if (!is_empty_Name(fname)) + FNAME_SET_TOP(fname); + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("Phase3: %5d+%d FILE ", + i-1-sym->n_numaux, (Int)sym->n_numaux ); + print_Name(fname); + VG_(printf)("\n"); + } + break; + } + } + continue; + } + /* --- END C_FILE [source file] --- */ + + /* --- BEGIN C_BINCL [beginning of include] --- */ + if (sym->n_sclass == C_BINCL && sym->n_numaux == 0) { + FNAME_PUSH(name); + if (SHOW && SHOW_SYMS_P3) + VG_(printf)("Phase3: %5d+%d BINCL %s\n", + i-1-sym->n_numaux, (Int)sym->n_numaux, + "fixme-Name-printing(3)" /*name*/ ); + continue; + } + /* --- END C_BINCL [beginning of include] --- */ + + /* --- BEGIN C_EINCL [end of include] --- */ + if (sym->n_sclass == C_EINCL && sym->n_numaux == 0) { + FNAME_POP; + if (SHOW && SHOW_SYMS_P3) + VG_(printf)("Phase3: %5d+%d EINCL %s\n", + i-1-sym->n_numaux, (Int)sym->n_numaux, + "fixme-Name-printing(4)" /*name*/ ); + continue; + } + /* --- END C_EINCL [end of include] --- */ + + /* everything else that is interesting is in the text + section. */ + if (!is_text) + continue; + + /* --- BEGIN C_FCN(.bf) [function begin mark] --- */ + if (sym->n_sclass == C_FCN + && sym->n_numaux == 1 + && eq_string_Name(name, ".bf")) { + /* aux is BLOCK */ + aux = SYM_IX(oi_symtab, i-1); + Addr fn_start_avma = ((Addr)sym->n_value) + text_bias; + Int fn_start_lnno = ((AUXENT*)aux)->x_sym.x_misc.x_lnsz.x_lnno; + /* Look in 'syms' to see if we have anything for address + fn_avma. */ + XCoffSym key; + VG_(memset)(&key, 0, sizeof(key)); + key.first = fn_start_avma; + key.last = fn_start_avma; + Word ix_lo, ix_hi; + + /* Search for all symbols intersecting fn_start_avma. */ + Bool found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + /* All the 'syms' entries from ix_lo to ix_hi match. */ + + for (k = ix_lo; k <= ix_hi; k++) { + XCoffSym* tsym = (XCoffSym*)VG_(indexXA)(syms,k); + + /* note the start line number */ + if (tsym->slnno == 0 && fn_start_lnno > 0) + tsym->slnno = fn_start_lnno; + + /* also the current filename, if we know it */ + if (is_empty_Name(tsym->fname) + && !is_empty_Name(FNAME_GET_TOP)) + tsym->fname = FNAME_GET_TOP; + + /* remember the first in the range as the new current + (I've never seen a range with > 1) */ + if (k == ix_lo) + p3currsym = tsym; + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("Phase3: %5d+%d FCN(.bf) 0x%016llx " + "lnno=%-4d ", + i-1-sym->n_numaux, (Int)sym->n_numaux, + (ULong)sym->n_value, + fn_start_lnno ); + print_Name(tsym->name); + VG_(printf)("\n"); + if (!is_empty_Name(tsym->fname)) { + VG_(printf)("Phase3: "); + print_Name(tsym->fname); + VG_(printf)("\n"); + } + } + } + } + continue; + } + /* --- END C_FCN(.bf) [function begin mark] --- */ + + /* --- BEGIN C_FCN(.ef) [function end mark] --- */ + if (sym->n_sclass == C_FCN + && sym->n_numaux == 1 + && eq_string_Name(name, ".ef")) { + /* aux is BLOCK */ + aux = SYM_IX(oi_symtab, i-1); + /* In this case the n_value field appears to give the address + of the first insn following the end of the function. + Hence the - 1. */ + Addr fn_end_avma = ((Addr)sym->n_value) + text_bias - 1; + Int fn_end_lnno = ((AUXENT*)aux)->x_sym.x_misc.x_lnsz.x_lnno; + + if (p3currsym + && fn_end_avma >= p3currsym->first + && fn_end_avma <= p3currsym->last) { + if (p3currsym->elnno == 0 && fn_end_lnno > 0) + p3currsym->elnno = fn_end_lnno; + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("Phase3: %5d+%d FCN(.ef) 0x%016llx " + "lnno=%-4d ", + i-1-sym->n_numaux, (Int)sym->n_numaux, + (ULong)sym->n_value, + fn_end_lnno ); + print_Name(p3currsym->name); + VG_(printf)("\n"); + } + if (fn_end_avma < p3currsym->last) { + /* also take the opportunity to trim the symbol's + length to something less than established by the + initial estimation done by Phases 1 and 2. */ + if (0) VG_(printf)("trim end from %#lx to %#lx\n", + p3currsym->last, fn_end_avma); + p3currsym->last = fn_end_avma; + } + } + continue; + } + /* --- END C_FCN(.ef) [function end mark] --- */ + + } + + /* ---------------------------------------------------------- + Phase 4: read and enumerate the line number entries, if + there are any. This depends on knowing the function start/end + line numbers established in Phase 3. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P4) { + VG_(printf)("--- BEGIN Phase4 (read line number info) ---\n"); + VG_(printf)("--- note: shown addresses are ACTUAL VMAs ---\n"); + } + + /* Re-sort 'syms' using the compare-start-addresses ordering, so we + can use that in subsequent searches. */ + VG_(setCmpFnXA)(syms, cmp_XCoffSym_by_start); + VG_(sortXA)(syms); + + if (oi_lnos && oi_nent_lnos > 0) { + +# if defined(VGP_ppc32_aix5) + vg_assert(LINESZ == 6); /* XCOFF32 */ +# elif defined(VGP_ppc64_aix5) + vg_assert(LINESZ == 12); /* XCOFF64 */ +# else +# error "Unknown plat" +# endif + +# define LNO_IX(_tab,_n) \ + ((LINENO*)(((UChar*)(_tab)) + LINESZ * (_n))) + + /* Current fn that we are processing line numbers for */ + XCoffSym* p4currsym = NULL; + + /* SegInfo's string table pointer for p4currsym's file name. + Allocated on demand, so as not to waste space in the + SegInfo's string table. */ + UChar* si_fname_str = NULL; + + /* Ditto the directory name, if we can manage it. */ + UChar* si_dname_str = NULL; + + for (i = 0; i < oi_nent_lnos; i++) { + LINENO* lno = LNO_IX(oi_lnos,i); + + if (lno->l_lnno == 0) { + /* New fn. We get given the index in the symbol table of + the relevant function. It should be a C_EXT, C_WEAKEXT + or C_HIDEXT flavour, according to the IBM docs. */ + Int sym_ix = (Int)lno->l_addr.l_symndx; + sym = SYM_IX(oi_symtab, sym_ix); + if (!(sym->n_sclass == C_EXT + || sym->n_sclass == C_WEAKEXT + || sym->n_sclass == C_HIDEXT)) + return "readxcoff.c: invalid symbol reference" + " in line number info"; + /* For these 3 symbol kinds, the n_value field is the + symbol's stated VMA. Convert this to an actual VMA and + use that to find the associated XCoffSym. */ + Addr sym_avma = ((Addr)sym->n_value) + text_bias; + + XCoffSym key; + VG_(memset)(&key, 0, sizeof(key)); + key.first = sym_avma; + Word ix_lo, ix_hi; + + Bool found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + /* All the 'syms' entries from ix_lo to ix_hi match. + Just use the lowest (sigh ..) */ + p4currsym = (XCoffSym*)VG_(indexXA)(syms, ix_lo); + } else { + /* We can't find the relevant sym, but we still have to + wade through the line number info for this function + until we get to the starting record for the next + one. */ + p4currsym = NULL; + } + + /* If we decide to add any line info for this fn to the + SegInfo, we'll allocate this. Otherwise don't + bother. */ + si_fname_str = NULL; + si_dname_str = NULL; + + if (SHOW && SHOW_SYMS_P4) { + VG_(printf)("Phase4: new fn (%d found), avma 0x%016llx ", + (Int)(ix_hi-ix_lo+1), + (ULong)sym_avma ); + if (p4currsym) + print_Name(p4currsym->name); + else + VG_(printf)("UNKNOWN"); + VG_(printf)("\n"); + } + + } else { + /* Line number entry for the current fn. */ + if (!p4currsym) + continue; + Int line_no = (Int)(UInt)lno->l_lnno; + line_no += (p4currsym->slnno - 1); + Addr line_first_avma = ((Addr)lno->l_addr.l_paddr) + text_bias; + if (line_first_avma < p4currsym->first + || line_first_avma > p4currsym->last) + continue; + Addr line_last_avma = p4currsym->last; + /* Try to refine the last_avma by looking at the next + line's entry. */ + + /* XXX: TODO. What we have currently works only because + the generic line number canonicaliser truncates + overlapping address ranges in the way which we happen + to need anyway. */ + if (SHOW && SHOW_SYMS_P4) + VG_(printf)("Phase4: line %d 0x%016llx - 0x%016llx\n", + line_no, (ULong)line_first_avma, + (ULong)line_last_avma); + + /* This now has to be allocated. Try and figure out the + dir name at the same time. This is a bit ugly in that + it involves messing with the string after it's been + copied into the SegInfo's string table, but seems + harmless enough. */ + if ((!si_fname_str) && !is_empty_Name(p4currsym->fname)) { + si_dname_str = NULL; + si_fname_str = ML_(addStr)(di, p4currsym->fname.vec, + p4currsym->fname.len); + UChar* lastslash = VG_(strrchr)(si_fname_str, '/'); + if (lastslash) + vg_assert(lastslash[0] == '/'); + if (lastslash[1] != 0) { + si_dname_str = si_fname_str; + lastslash[0] = 0; /* replace the / with a NUL + terminator */ + si_fname_str = lastslash+1; + if (0) VG_(printf)("XXX %s %s\n", si_dname_str, + si_fname_str); + } + } + /* finally .. */ + if (line_no >= 0) + ML_(addLineInfo)(di, si_fname_str, si_dname_str, + line_first_avma, line_last_avma+1, + line_no, i/*debugging only*/); + } + } + +# undef LNO_IX + } + +#if defined(OFFICIAL_PHASE5) + /* ---------------------------------------------------------- + Phase 5: Do another trawl of the XCOFF symbol table, looking + for TOC entries for the entries we've already placed in 'syms'. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P5) + VG_(printf)("--- BEGIN official Phase5 (find TOC pointers) ---\n"); + + Bool is_cfun; + + i = 0; + while (1) { + + if (i >= oi_nent_symtab) + break; + + sym = SYM_IX(oi_symtab, i); + is_text = sntext_1based_if_known != -1 + && (Int)sym->n_scnum == sntext_1based_if_known; + is_data = sndata_1based_if_known != -1 + && (Int)sym->n_scnum == sndata_1based_if_known; + is_cfun = sym->n_scnum == N_DEBUG + && sym->n_sclass == C_FUN; + + i++; + i += sym->n_numaux; + + if (!is_cfun && !is_data) + continue; + + if (SHOW && SHOW_SYMS_P5) + VG_(printf)("Phase5o: %5d+%d ", i-1-sym->n_numaux, + (Int)sym->n_numaux); + + name = mk_const_Name("(unknown)"); + if (is_cfun) + name = maybeDerefStrTab( sym, oi_debug, oi_n_debug ); + else + if (sym->n_sclass & DBXMASK) + name = mk_const_Name("(dbxstr)"); + else + name = maybeDerefStrTab( sym, oi_strtab, oi_n_strtab); + + if (SHOW && SHOW_SYMS_P5) { + VG_(printf)("%5s(%2d) %6s svma 0x%016llx ", + is_text ? "text" : is_data ? "data" : "other", + (Int)sym->n_scnum, + name_of_syment_n_sclass(sym->n_sclass), + (ULong)sym->n_value); + print_Name(name); + VG_(printf)("\n"); + } + + Addr avma = (Addr)sym->n_value + data_bias; + if (0) VG_(printf)("data sym: avma %p, limits %p-%p\n", + avma, data_avma,data_avma + data_alen); + + /* Does avma point to 3 valid words inside the actual data + segment? iow, can it possibly be a valid function + descriptor? If not, move on. */ + if (! (avma >= data_avma + && avma + 3 * sizeof(Word) <= data_avma + data_alen) ) + continue; + + UWord* fndescr = (UWord*)avma; + + if (SHOW && SHOW_SYMS_P5) + VG_(printf)(" fndescr = {0x%lx,0x%lx}\n", + fndescr[0], fndescr[1]); + + /* Another check: fndescr[0], the entry point, must point inside + the actual text segment. Discard any that don't. */ + + Addr fndescr_0 = (Addr)fndescr[0]; + if (fndescr_0 < si->text_avma + || fndescr_0 >= si->text_avma+si->text_size) + continue; + + /* Let's suppose that fndescr is the descriptor for a + function with name NAME. If that's so, then 'syms' + acquired by stage 2 should have an entry of name '.NAME' + whose address is fndescr[0]. If so, then fndescr[1] must + be the relevant r2 value for it. */ + /* Look in 'syms' to see if we have anything for address + fndescr[0]. */ + XCoffSym key; + VG_(memset)(&key, 0, sizeof(key)); + key.first = fndescr_0; + Word ix_lo, ix_hi; + Bool found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + /* So all the 'syms' entries from ix_lo to ix_hi have an + address which matches the entry point address stated in + this descriptor. For each one, as a final sanity + check, see if the 'syms' entry has a name .NAME where + NAME is that of the data symbol currently under + consideration. If so, it's a pretty good bet that this + descriptor matches the text symbol we already have, and + so we have a valid tocptr value from fndescr[1]. */ + for (k = ix_lo; k <= ix_hi; k++) { + XCoffSym* tsym = (XCoffSym*)VG_(indexXA)(syms,k); + vg_assert(!is_empty_Name(tsym->name)); + /* VG_(printf)("cmp %s %s\n", name, tsym->name); */ + /* VG_(printf)("found matching %d %s\n", k, tsym->name); */ + if (tsym->name.len == 1 + name.len + && tsym->name.vec[0] == '.' + && 0 == VG_(memcmp)(&tsym->name.vec[1], + &name.vec[0], name.len)) { + Addr r2val = fndescr[1]; + if (tsym->r2known) { + if (tsym->r2value != r2val) + /* COMPLAIN - conflicting r2 values*/ ; + } else { + tsym->r2known = True; + tsym->r2value = r2val; + } + } + } + } + + } + +#else /* !defined(OFFICIAL_PHASE5) */ + /* ---------------------------------------------------------- + Alternative kludgey Phase 5: find TOC entries for 'syms' by the + blunt-instrument approach of scanning the actual data section + and noting anything that looks like a function descriptor. + This is dangerous in the sense that if there are any 3 word + structs which are not real function descriptors but just happen + to look like them, then those will be included too. + Seems unlikely though. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P5) + VG_(printf)("--- BEGIN kludged Phase5 (find TOC pointers) ---\n"); + + if (SHOW) + VG_(printf)("Phase5: actual data segment: %#lx %#lx\n", + di->data_avma, di->data_avma + di->data_size); + + /* Skip obviously-missing data sections. */ + if (di->data_avma != 0 && di->data_size >= sizeof(UWord)) { + + /* set up for inspecting all the aligned words in the actual + data section. */ + + Addr tmp = di->data_avma; + while (tmp & (sizeof(UWord)-1)) + tmp++; + + UWord* first_data_word = (UWord*)tmp; + tmp = di->data_avma + di->data_size - sizeof(UWord); + while (tmp & (sizeof(UWord)-1)) + tmp--; + UWord* last_data_word = (UWord*)tmp; + + if (SHOW) + VG_(printf)("Phase5: data segment conservatively aligned %p %p\n", + first_data_word, last_data_word); + + UWord* wP = first_data_word; + UWord w; + + while (True) { + + XCoffSym key; + Word ix_lo, ix_hi; + Bool found; + + if (& wP[2] > last_data_word) + break; /* no space left for a 3-word descriptor */ + + w = wP[0]; + if (!(w >= di->text_avma + && w < di->text_avma + di->text_size)) { + wP++; + continue; /* entry pointer is not to text segment */ + } + + w = wP[1]; + if (!(w >= di->data_avma && w < di->data_avma + di->data_size)) { + wP++; + if (SHOW && SHOW_SYMS_P5) { + VG_(memset)(&key, 0, sizeof(key)); + key.first = wP[0]; + found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + vg_assert(ix_lo <= ix_hi); + XCoffSym* tsym = (XCoffSym*)VG_(indexXA)(syms,ix_lo); + VG_(printf)("Phase5: bad tocptc at 0x%016llx={", + (ULong)(UWord)(wP-1)); + print_Name(tsym->name); + VG_(printf)(",%p}\n", (void*)w); + } + } + continue; /* r2 value does not point to data segment */ + } + + /* ok, so wP might be a valid fn descr. But does it point to + a text symbol we know about? Look in 'syms' to see if we + have anything for wP[0]. */ + VG_(memset)(&key, 0, sizeof(key)); + key.first = wP[0]; + found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + for (k = ix_lo; k <= ix_hi; k++) { + XCoffSym* tsym = (XCoffSym*)VG_(indexXA)(syms,k); + Addr r2val = wP[1]; + if (tsym->r2known) { + if (tsym->r2value != r2val) + /* COMPLAIN - conflicting r2 values*/ ; + } else { + tsym->r2known = True; + tsym->r2value = r2val; + if (SHOW && SHOW_SYMS_P5) { + VG_(printf)("Phase5: found tocptr 0x%016llx for ", + (ULong)r2val); + print_Name(tsym->name); + VG_(printf)("\n"); + } + } + } + } + + wP++; + } + } + +#endif /* defined(OFFICIAL_PHASE5) */ + + /* ---------------------------------------------------------- + Phase 6: trivial: copy the syms out of 'syms' into the + generic debuginfo tables, and free up 'syms'. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P6) { + VG_(printf)("--- BEGIN Phase6 (finalise symbol info) ---\n"); + VG_(printf)("--- note: shown addresses are ACTUAL VMAs ---\n"); + } + + for (i = 0; i < nsyms; i++) { + DiSym dis; + XCoffSym* s = (XCoffSym*)VG_(indexXA)(syms, i); + Addr addr = s->first; + UWord size = s->last + 1 - s->first; + Bool guessed_toc = False; + + /* If everything worked right, the symbol should fall within the + mapped text segment. Hence .. */ + Bool sane = addr >= di->text_avma + && addr+size <= di->text_avma + di->text_size; + + if (SHOW && SHOW_SYMS_P6) { + VG_(printf)("Phase6: %s %3d 0x%08lx-0x%08lx 0x%08lx ", + sane ? " " : "BAD", + i, + addr, + addr + size - 1, + s->r2known ? s->r2value : 0 ); + print_Name(s->name); + VG_(printf)("\n"); + } + +# if defined(VGP_ppc64_aix5) + /* 64-bit kludge: if we can't find a plausible toc ptr just use + the one specified in the XCOFF auxiliary header. */ + if ((!s->r2known) + && toc_avma != 0 + && s->name.len > 8 + && 0==VG_(strncmp)(&s->name.vec[0], "._vgwZU_", 8)) { + s->r2known = True; + s->r2value = toc_avma; + guessed_toc = True; + if (SHOW && SHOW_SYMS_P6) + VG_(printf)("Phase6: assuming toc 0x%08lx for above sym\n", + s->r2value); + } +# endif + + /* Actually add the symbol (finallyatlast) */ + if (sane) { + UInt nlen; + dis.addr = addr; + dis.size = size; + dis.tocptr = s->r2known ? s->r2value : 0; + dis.isText = True; + vg_assert(!is_empty_Name(s->name)); + nlen = s->name.len; + vg_assert(nlen > 0); + if (s->name.vec[0] == '.') + dis.name = ML_(addStr)(di, &s->name.vec[1], nlen-1 ); + else + dis.name = ML_(addStr)(di, &s->name.vec[0], nlen-0 ); + ML_(addSym)( di, &dis ); + if (0 && s->r2known) + VG_(printf)("r2 known for %s\n", + "fixme-Name-printing(5)" /*s->name*/ ); + + if (guessed_toc) + VG_(message)(Vg_DebugMsg, "WARNING: assuming toc 0x%lx for %s", + s->r2value, dis.name); + } + } + + /* Free up the XA */ + VG_(deleteXA)(syms); + +# undef SYM_IX + + return NULL; /*success*/ +} + + +static void show_loader_section ( struct _DebugInfo* di, + UChar* oi_start, UWord size ) +{ + Int i, j; + LDHDR* hdr = (LDHDR*)oi_start; + UChar* strtab_import = NULL; + UChar* strtab_other = NULL; + if (SHOW) { + VG_(printf)(" l_version %llu\n", (ULong)hdr->l_version); + VG_(printf)(" l_nsyms %lld\n", (Long)hdr->l_nsyms); + VG_(printf)(" l_nreloc %lld\n", (Long)hdr->l_nreloc); + VG_(printf)(" l_istlen (i st len) %lld\n", (Long)hdr->l_istlen); + VG_(printf)(" l_impoff (i st off) %llu\n", (ULong)hdr->l_impoff); + VG_(printf)(" l_nimpid (# imps) %llu\n", (ULong)hdr->l_nimpid); + VG_(printf)(" l_stlen (st len) %llu\n", (ULong)hdr->l_stlen); + VG_(printf)(" l_stoff (st off) %llu\n", (ULong)hdr->l_stoff); + } + + if (hdr->l_istlen > 0) + strtab_import = oi_start + hdr->l_impoff; + if (hdr->l_stlen > 0) + strtab_other = oi_start + hdr->l_stoff; + + if (strtab_import) { + if (SHOW) + VG_(printf)(" Loader Import String Table: %llu bytes\n", + (ULong)hdr->l_istlen); + i = 0; + j = 0; + while (1) { + if (i >= hdr->l_istlen) + break; + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)(" %3d%s ", i, (j%3)==0 ? "::" : " "); + j++; + while (i < hdr->l_istlen && strtab_import[i]) { + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)("%c", sanitiseChar(strtab_import[i])); + i++; + } + i++; + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)("\n"); + } + } + + if (strtab_other) { + if (SHOW) + VG_(printf)(" Loader Other String Table: %llu bytes\n", + (ULong)hdr->l_stlen); + i = 0; + while (1) { + int len = 0; + if (i+1 >= hdr->l_stlen) + break; + len = (unsigned char)strtab_other[i]; + len <<= 8; + len |= (unsigned char)strtab_other[i+1]; + i += 2; + if (i >= hdr->l_stlen) + break; + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)(" %2d len %2d ", i, len); + while (len >= 0 && i < hdr->l_stlen && strtab_other[i]) { + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)("%c", sanitiseChar(strtab_other[i])); + i++; + len--; + } + i++; + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)("\n"); + } + } + + if (SHOW) + VG_(printf)(" Loader Symbol Table: %lld entries\n", (Long)hdr->l_nsyms); + LDSYM* sym = (LDSYM*)(oi_start + sizeof(LDHDR)); + for (i = 0; i < hdr->l_nsyms; i++) { + Name name = maybeDerefStrTab( (SYMENT*)&sym[i], + strtab_other, hdr->l_stlen ); + if (SHOW && SHOW_LD_SYMTAB) { + VG_(printf)(" %2d: %016llx sec %d ty 0x%02x " + "scla 0x%02x itab %d ", + i, (ULong)sym[i].l_value, (Int)sym[i].l_scnum, + (Int)sym[i].l_smtype, (Int)sym[i].l_smclas, + (Int)sym[i].l_ifile); + print_Name(name); + VG_(printf)("\n"); + } + } + +# if defined(VGP_ppc32_aix5) + vg_assert(sizeof(LDREL) == 12); +# elif defined(VGP_ppc64_aix5) + vg_assert(sizeof(LDREL) == 16); +# else +# error Unknown platform +# endif + + LDREL* rel = (LDREL*)(&sym[hdr->l_nsyms]); + if (SHOW) + VG_(printf)(" Loader Relocation Table: %lld entries\n", + (Long)hdr->l_nreloc); + for (i = 0; i < hdr->l_nreloc; i++) { + if (SHOW && SHOW_LD_RELTAB) + VG_(printf)(" %3d: va %016llx sym %2lld rty 0x%4x sec %2d\n", + i, (ULong)rel[i].l_vaddr, (Long)rel[i].l_symndx, + (Int)rel[i].l_rtype, (Int)rel[i].l_rsecnm); + } + + if (SHOW) + VG_(printf)("\n"); +} + + +/* Returns True on success, False on any kind of error. + + The object file from which to read symbols is mapped temporarily at + [oimage .. oimage + n_oimage). + + The VMA of where the relevant text section really got loaded (the + "actual VMA", _avma) is [si->text_avma .. si->text_avma + + si->text_size). + + The VMA of the associated data section really got loaded + (the "actual VMA", _avma) is [data_avma .. data_avma + data_alen). + + We will need to peer at the loaded data section in order to make + sense of TOC entries, hence we need to be assured it is mapped and + readable. m_aspacemgr should have given us that assurance, in the + sense that data_avma/data_alen will be save to read in by the time + we get here. +*/ +static +Bool read_xcoff_mapped_object ( struct _DebugInfo* di, + UChar* oimage, UWord n_oimage ) +{ +#define BAD(_msg) do { ML_(symerr)(di, True/*serious*/,_msg); \ + return False; } while (0) + + Int i, j; + + /* The first byte after the oimage - we can't go here */ + UChar* oimage_after = oimage + n_oimage; + + UChar* cursor = oimage; + + /* ------------ File Header ------------ */ +# if defined(VGP_ppc32_aix5) + if (sizeof(FILHDR) != 20) + BAD("readxcoff.c: invalid FILHDR size (32-bit)"); +# elif defined(VGP_ppc64_aix5) + if (sizeof(FILHDR) != 24) + BAD("readxcoff.c: invalid FILHDR size (64-bit)"); +# else +# error "Invalid platform" +# endif + + if (n_oimage < sizeof(FILHDR)) + BAD("readxcoff.c: XCOFF object file header is implausibly small (2)"); + + FILHDR* t_filehdr = (FILHDR*)cursor; + cursor += sizeof(FILHDR); + + if (SHOW) { + VG_(printf)("\nFile Header:\n"); + VG_(printf)(" magic 0x%04x (%s)\n", + (UInt)t_filehdr->f_magic, + name_of_filhdr_f_magic(t_filehdr->f_magic)); + } + +# if defined(VGP_ppc32_aix5) + if (t_filehdr->f_magic != 0x01DF /* XCOFF32 */) + BAD("readxcoff.c: XCOFF32 object file header has invalid magic"); +# elif defined(VGP_ppc64_aix5) + if (t_filehdr->f_magic != 0x01F7 /* XCOFF64 */) + BAD("readxcoff.c: XCOFF64 object file header has invalid magic"); +# else +# error "Invalid platform" +# endif + + if (SHOW) { + VG_(printf)(" # of sections %u\n", (UInt)t_filehdr->f_nscns); + VG_(printf)(" time/date 0x%08llx\n", (ULong)t_filehdr->f_timdat); + VG_(printf)(" symtab foffset %llu\n", (ULong)t_filehdr->f_symptr); + VG_(printf)(" # symtab entries %llu\n", (ULong)t_filehdr->f_nsyms); + VG_(printf)(" size of aux hdr %llu\n", (ULong)t_filehdr->f_opthdr); + VG_(printf)(" flags 0x%04x\n", (UInt)t_filehdr->f_flags); + if (t_filehdr->f_flags) { + VG_(printf)(" "); + if (t_filehdr->f_flags & F_RELFLG) VG_(printf)("NoRelocInfo "); + if (t_filehdr->f_flags & F_EXEC) VG_(printf)("IsExec "); + if (t_filehdr->f_flags & F_LNNO) VG_(printf)("NoLineInfo "); + if (t_filehdr->f_flags & F_LSYMS) VG_(printf)("LSYMS "); + if (t_filehdr->f_flags & F_FDPR_PROF) VG_(printf)("FDPR_PROF "); + if (t_filehdr->f_flags & F_FDPR_OPTI) VG_(printf)("FDPR_OPTI "); + if (t_filehdr->f_flags & F_DSA) VG_(printf)("LargeProc "); +# if defined(F_DEP_1) + if (t_filehdr->f_flags & F_DEP_1) VG_(printf)("DEP_1 "); +# endif +# if defined(F_VARPG) + if (t_filehdr->f_flags & F_VARPG) VG_(printf)("VARPG "); +# endif + if (t_filehdr->f_flags & F_LPTEXT) VG_(printf)("LPTEXT "); + if (t_filehdr->f_flags & F_LPDATA) VG_(printf)("LPDATA "); + if (t_filehdr->f_flags & F_DYNLOAD) VG_(printf)("Dynamic "); + if (t_filehdr->f_flags & F_SHROBJ) VG_(printf)("SharedObj "); + if (t_filehdr->f_flags & F_LOADONLY) VG_(printf)("LOADONLY "); +# if defined(F_DEP_2) + if (t_filehdr->f_flags & F_DEP_2) VG_(printf)("DEP_2 "); +# endif + VG_(printf)("\n"); + } + } + + /* ------------ Auxiliary Header ------------ */ +# if defined(VGP_ppc32_aix5) + if (sizeof(AOUTHDR) != 72) + BAD("readxcoff.c: invalid AOUTHDR size (32-bit)"); +# elif defined(VGP_ppc64_aix5) + if (sizeof(AOUTHDR) != 120) + BAD("readxcoff.c: invalid AOUTHDR size (64-bit)"); +# else +# error "Invalid platform" +# endif + + Int sntext_1based_if_known = -1; + Int sndata_1based_if_known = -1; + + Addr data_svma = 0; /* stated VMA of data section, if known */ + Bool data_svma_known = False; + Word data_bias = 0; + UWord data_alen_from_auxhdr = 0; + + Addr text_svma = 0; /* stated VMA of text section, if known */ + Bool text_svma_known = False; + Word text_bias = 0; + + Addr toc_avma = 0; /* actual VMA of toc, if known */ + Addr toc_svma = 0; /* stated VMA of toc, if known */ + Addr toc_svma_known = False; + + AOUTHDR* t_auxhdr = NULL; + if (t_filehdr->f_opthdr > 0) { + t_auxhdr = (AOUTHDR*)cursor; + cursor += sizeof(AOUTHDR); + sntext_1based_if_known = (Int)t_auxhdr->o_sntext; + sndata_1based_if_known = (Int)t_auxhdr->o_sndata; + + if (SHOW) { + VG_(printf)("\nAuxiliary Header\n"); + VG_(printf)(" magic 0x%04x (should be 0x010b)\n", + (UInt)t_auxhdr->magic); + VG_(printf)(" vstamp 0x%04x\n", (UInt)t_auxhdr->vstamp); + VG_(printf)(" tsize %lld\n", (Long)t_auxhdr->tsize); + VG_(printf)(" dsize %lld\n", (Long)t_auxhdr->dsize); + VG_(printf)(" bsize %lld\n", (Long)t_auxhdr->bsize); + VG_(printf)(" entry 0x%llx\n", (ULong)t_auxhdr->entry); + VG_(printf)(" text_start 0x%llx (stated)\n", + (ULong)t_auxhdr->text_start); + VG_(printf)(" data_start 0x%llx (stated)\n", + (ULong)t_auxhdr->data_start); + VG_(printf)(" o_toc 0x%llx\n", (ULong)t_auxhdr->o_toc); + VG_(printf)(" o_snentry %d\n", (Int)t_auxhdr->o_snentry); + VG_(printf)(" o_sntext %d\n", (Int)t_auxhdr->o_sntext); + VG_(printf)(" o_sndata %d\n", (Int)t_auxhdr->o_sndata); + VG_(printf)(" o_sntoc %d\n", (Int)t_auxhdr->o_sntoc); + VG_(printf)(" o_snloader %d\n", (Int)t_auxhdr->o_snloader); + VG_(printf)(" o_snbss %d\n", (Int)t_auxhdr->o_snbss); + VG_(printf)(" o_algntext %d\n", (Int)t_auxhdr->o_algntext); + VG_(printf)(" o_algndata %d\n", (Int)t_auxhdr->o_algndata); + VG_(printf)(" o_modtype \"%c%c\"\n", + (UChar)t_auxhdr->o_modtype[0], + (UChar)t_auxhdr->o_modtype[1] ); + VG_(printf)(" o_cpuflag 0x%02x\n", (UInt)t_auxhdr->o_cpuflag); + VG_(printf)(" o_cputype 0x%02x\n", (UInt)t_auxhdr->o_cputype); + VG_(printf)(" o_maxstack %llu\n", (ULong)t_auxhdr->o_maxstack); + VG_(printf)(" o_maxdata %llu\n", (ULong)t_auxhdr->o_maxdata); + VG_(printf)(" o_debugger %u\n", t_auxhdr->o_debugger); + /* printf(" o_textpsize %u\n", (UInt)t_auxhdr->o_textpsize); */ + /* printf(" o_stackpsize %u\n", (UInt)t_auxhdr->o_stackpsize); */ + } + + text_svma = t_auxhdr->text_start; + text_svma_known = True; + + data_svma = t_auxhdr->data_start; + data_svma_known = True; + + /* The auxhdr may claim the data section is longer than + data_alen, so note the auxhdr-claimed size too. */ + data_alen_from_auxhdr = (UWord)t_auxhdr->dsize; + + if (t_auxhdr->o_sntoc == t_auxhdr->o_sndata) { + toc_svma = (Addr)t_auxhdr->o_toc; + toc_svma_known = True; + } + } + + /* ------------ Section Headers ------------ */ +# if defined(VGP_ppc32_aix5) + if (sizeof(SCNHDR) != 40) + BAD("readxcoff.c: invalid SCNHDR size (32-bit)"); +# elif defined(VGP_ppc64_aix5) + if (sizeof(SCNHDR) != 72) + BAD("readxcoff.c: invalid SCNHDR size (64-bit)"); +# else +# error "Invalid platform" +# endif + + SCNHDR* t_scnhdr = (SCNHDR*)cursor; + + if (SHOW) + VG_(printf)("\nSection Headers: %d entries\n", t_filehdr->f_nscns); + + /* Where the stabs strings are in the oimage */ + UChar* oi_debug = NULL; + UWord oi_n_debug = 0; + + /* Where the line number entries for the text section are + in the oimage */ + UChar* oi_lnos = NULL; + UWord oi_nent_lnos = 0; /* number of records */ + + for (i = 0; i < t_filehdr->f_nscns; i++) { + UChar sname_safe[9]; + for (j = 0; j < 8; j++) + sname_safe[j] = t_scnhdr[i].s_name[j]; + sname_safe[8] = 0; + if (SHOW) { + VG_(printf)(" --- #%d ---\n", i); + VG_(printf)(" s_name %s\n", sname_safe); + VG_(printf)(" s_paddr 0x%llx\n", (ULong)t_scnhdr[i].s_paddr); + VG_(printf)(" s_vaddr 0x%llx\n", (ULong)t_scnhdr[i].s_vaddr); + VG_(printf)(" s_size %lld\n", (Long)t_scnhdr[i].s_size); + VG_(printf)(" s_scnptr %lld\n", (Long)t_scnhdr[i].s_scnptr); + VG_(printf)(" s_relptr %lld\n", (Long)t_scnhdr[i].s_relptr); + VG_(printf)(" s_lnnoptr %lld\n", (Long)t_scnhdr[i].s_lnnoptr); + VG_(printf)(" s_nreloc %llu\n", (ULong)t_scnhdr[i].s_nreloc); + VG_(printf)(" s_nlnno %llu\n", (ULong)t_scnhdr[i].s_nlnno); + VG_(printf)(" s_flags 0x%llx (%s)\n", + (ULong)t_scnhdr[i].s_flags, + name_of_scnhdr_s_flags(t_scnhdr[i].s_flags)); + } + /* find the stabs strings */ + if (t_scnhdr[i].s_flags == STYP_DEBUG) { + oi_debug = oimage; + oi_debug += (UWord)t_scnhdr[i].s_scnptr; + oi_n_debug = (UWord)t_scnhdr[i].s_size; + } + /* find the line number entries for the text section */ + if (t_scnhdr[i].s_flags == STYP_TEXT && t_scnhdr[i].s_lnnoptr > 0) { + oi_lnos = oimage; + oi_lnos += (UWord)t_scnhdr[i].s_lnnoptr; + oi_nent_lnos = (UWord)t_scnhdr[i].s_nlnno; + /* XCOFF is clearly the result of years of kludgery, and + here's one place it shows. .s_nlnno is a 16-bit field, so + if there are 65535 or more entries, they can't be + represented here. In that case, the real number is stored + in a 32-bit field of a an "overflow section header" - a + dummy section header which has no purpose other than to + hold the correct count. And then this kludge applies to + XCOFF32, not XCOFF64. */ + if (t_scnhdr[i].s_nlnno == 0xFFFF + || t_scnhdr[i].s_nreloc == 0xFFFF) { + /* have to test both fields, according to the docs */ + /* find the relevant overflow header */ + for (j = 0; j < t_filehdr->f_nscns; j++) + if (t_scnhdr[j].s_flags == STYP_OVRFLO + && t_scnhdr[j].s_nlnno == i+1 /* ref to correct scn? */ + && t_scnhdr[j].s_nreloc == i+1 /* also must check this */) + break; + vg_assert(j >= 0 && j <= t_filehdr->f_nscns); + if (j == t_filehdr->f_nscns) + /* Hmm. We're hosed. Give up. */ + BAD("readxcoff.c: can't find a required " + "overflow section header"); + /* finally, we have the real count. */ + oi_nent_lnos = (UWord)t_scnhdr[j].s_vaddr; + } + } + cursor += sizeof(SCNHDR); + } + if (SHOW) { + VG_(printf)("\n debug image (stabs strings) at %p size %ld bytes\n", + oi_debug, oi_n_debug); + VG_(printf)(" line number info at %p with %ld entries\n", + oi_lnos, oi_nent_lnos); + } + + /* ------------ establish Text/data biases ------------ */ + + /* Calculate, into text_bias, the offset that has to be added to + symbol table values (stated VMAs) so as to convert them to correct + addresses in the running image (actual VMAs). I can't find any + documentation for this, so the following is determined empirically. + + There appear to be two classes of loaded object: + + .o files. These have a stated text VMA of zero, and so their + symbols start from zero and work upwards. In that case the + bias is precisely the offset where the text section is + loaded (si->text_avma), that is, the actual text VMA. + + Except -- cryptically -- /usr/include/sys/ldr.h says that the + ld_info.ldinfo_textorg field is "start of loaded program + image (includes the XCOFF headers)". And so to get the + correct text bias it is necessary (determined empirically) to + add on the file offset for the text section. I guess this + means that (1) it is assumed the text section is always the + first in the file, and (2) in this case the stated text VMA + is where the start of the file is mapped, not the start of + the text section. + + Last verified 24 May 06. + + .so files, and executables. These have a non-zero stated text + VMA, for example 0x10000150. They appear to get loaded at some + arbitrary address (actual VMA) which is always a whole number + of pages, eg 0x20002000, and in such a way that the offset is + a whole number of pages. So in this example the offset (bias) + would be 0x20002000 - round_to_page_base(0x10000150). + */ + if (text_svma_known) { +#if 0 + if (text_svma == 0) { + text_bias = di->text_avma; + if (sntext_1based_if_known >= 1 + && sntext_1based_if_known <= t_filehdr->f_nscns) + text_bias += t_scnhdr[sntext_1based_if_known - 1].s_scnptr; + } else { + text_bias = di->text_avma - VG_PGROUNDDN(text_svma); + } +#else + text_bias = di->text_avma - text_svma; + if (sntext_1based_if_known >= 1 + && sntext_1based_if_known <= t_filehdr->f_nscns) + text_bias += t_scnhdr[sntext_1based_if_known - 1].s_scnptr; + +#endif + if (SHOW) + VG_(printf)(" text section: stated vma 0x%lx, " + "actual vma 0x%lx, bias 0x%lx\n", + text_svma, di->text_avma, text_bias); + } else { + text_bias = 0; + if (SHOW) + VG_(printf)(" text section: svma UNKNOWN, bias UNKNOWN\n"); + } + + if (data_svma_known) { + data_bias = di->data_avma - data_svma; + if (SHOW) + VG_(printf)(" data section: stated vma 0x%lx, " + "actual vma 0x%lx, bias 0x%lx\n", + data_svma, di->data_avma, data_bias); + } else { + data_bias = 0; + if (SHOW) + VG_(printf)(" data section: svma UNKNOWN, bias UNKNOWN\n"); + } + + if (toc_svma_known) { + toc_avma = toc_svma + data_bias; + if (SHOW) + VG_(printf)(" toc: stated vma 0x%lx, actual vma 0x%lx\n", + toc_svma, toc_avma); + } else { + if (SHOW) + VG_(printf)(" toc: svma UNKNOWN\n"); + toc_avma = 0; + } + + /* ------------ Section Data ------------ */ + for (i = 0; i < t_filehdr->f_nscns; i++) { + if (SHOW) + VG_(printf)("\nSection Data (sec %d, \"%s\")\n", + i, name_of_scnhdr_s_flags(t_scnhdr[i].s_flags) ); + switch (t_scnhdr[i].s_flags & 0xFFFF) { + case STYP_LOADER: + show_loader_section( di, oimage + t_scnhdr[i].s_scnptr, + t_scnhdr[i].s_size ); + break; + default: + if (SHOW) + VG_(printf)(" Not handled yet\n"); + break; + } + } + + /* ------------ establish String Table ------------ */ + /* This is after the symbol table, if it exists at all. */ + /* This is a bit of a hack. The easy way to find the string table + is assume it immediately follows the symbol table. That doesn't + work if there is no symbol table; but on the other hand if there + is no symbol table then there isn't much point in carrying on. + Hence, if there is no symbol table we just give up here and + claim to have successfully loaded zero symbols. */ + if (t_filehdr->f_nsyms == 0) { + if (SHOW) + VG_(printf)("Object contains no symbols. Stopping here.\n"); + return True; + } + + cursor = oimage; + cursor += t_filehdr->f_symptr; /* symtab start */ + cursor += SYMESZ * t_filehdr->f_nsyms; /* strtab start */ + /* Does this fall inside the file image? The first 4 bytes is the + string table size, so we need to be able to see at least + them. */ + UChar* oi_strtab = NULL; + UWord oi_n_strtab = 0; + if (cursor + 4 <= oimage_after) { + oi_strtab = cursor; + oi_n_strtab = (UWord)( *(UInt*)oi_strtab ); + if (0) { + VG_(printf)("oimage %p\n", oimage); + VG_(printf)("oimage_after %p\n", oimage_after); + VG_(printf)("cursor %p\n", cursor); + } + if (oi_strtab + oi_n_strtab > oimage_after) + BAD("readxcoff.c: string table exceeds image end"); + } + + /* ------------ Symbol Table ------------ */ + if (SHOW) + VG_(printf)("\nSymbol Table: %llu entries\n", (ULong)t_filehdr->f_nsyms); + cursor = oimage; + cursor += t_filehdr->f_symptr; + HChar* badness = read_symbol_table( + di, + cursor, t_filehdr->f_nsyms, + oi_strtab, oi_n_strtab, + oi_debug, oi_n_debug, + oi_lnos, oi_nent_lnos, + sntext_1based_if_known, sndata_1based_if_known, + data_alen_from_auxhdr, + toc_avma, + text_bias, data_bias + ); + if (badness) + BAD(badness); + /* cursor not used after this point */ + + /* ------------ String Table ------------ */ + if (oi_strtab) { + if (SHOW) + VG_(printf)("\nString Table: %lu bytes\n", oi_n_strtab); + i = 4; + while (1) { + if (i >= oi_n_strtab) + break; + if (SHOW && SHOW_STRTAB) + VG_(printf)(" %5d ", i); + while (i < oi_n_strtab && oi_strtab[i]) { + if (SHOW && SHOW_STRTAB) + VG_(printf)("%c", sanitiseChar(oi_strtab[i])); + i++; + } + i++; + if (SHOW && SHOW_STRTAB) + VG_(printf)("\n"); + } + } + + if (SHOW) + VG_(printf)("\n"); + return True; + +#undef BAD +} + + +static ULong ascii_to_ULong ( void* vbuf, Int nbuf ) +{ + Int i; + UChar c; + UChar* buf = (UChar*)vbuf; + ULong n = 0; + for (i = 0; i < nbuf; i++) { + c = buf[i]; + if (c >= '0' && c <= '9') + n = 10ULL * n + (ULong)(c - '0'); + } + return n; +} + + +/* Returns True on success, False if any kind of problem. */ +static +Bool read_xcoff_o_or_a ( /*MOD*/struct _DebugInfo* di, + HChar* a_name, HChar* o_name ) +{ + UChar* image = NULL; + Word n_image = 0; + Bool ok; + Int i; + SysRes sr, fd; + + struct vg_stat stat_buf; + + vg_assert(o_name); + + if (a_name == NULL) { + /* This is just a plain XCOFF object file. */ + + sr = VG_(stat)( o_name, &stat_buf ); + if (sr.isError) { + ML_(symerr)(di, True, "can't stat XCOFF object file"); + return False; + } + + n_image = stat_buf.st_size; + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("XCOFF object file size %ld\n", n_image); + if (n_image <= 0) { + ML_(symerr)(di, True, "implausible XCOFF object file size"); + return False; + } + + fd = VG_(open)( o_name, VKI_O_RDONLY, 0 ); + if (fd.isError) { + ML_(symerr)(di, True, "can't open XCOFF object file"); + return False; + } + + sr = VG_(am_mmap_file_float_valgrind)(n_image, VKI_PROT_READ, + fd.res, 0); + VG_(close)(fd.res); + + if (sr.isError) { + ML_(symerr)(di, True, "can't mmap XCOFF object file"); + return False; + } + + image = (UChar*)sr.res; + ok = read_xcoff_mapped_object( di, image, n_image ); + VG_(am_munmap_valgrind)( (Addr)image, n_image); + + /* assert OK */ + return ok; + + } else { + + /* It's an XCOFF .a file ("ar file format, large"). Map the + whole thing in, find the member specified by O_NAME, and read + symbols from that. */ + + sr = VG_(stat)( a_name, &stat_buf ); + if (sr.isError) { + ML_(symerr)(di, True, "can't stat XCOFF archive file"); + return False; + } + + n_image = stat_buf.st_size; + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("XCOFF archive file size %ld\n", n_image); + if (n_image <= 0) { + ML_(symerr)(di, True, "implausible XCOFF archive file size"); + return False; + } + + fd = VG_(open)( a_name, VKI_O_RDONLY, 0 ); + if (fd.isError) { + ML_(symerr)(di, True, "can't open XCOFF archive file"); + return False; + } + + sr = VG_(am_mmap_file_float_valgrind)(n_image, VKI_PROT_READ, + fd.res, 0); + VG_(close)(fd.res); + + if (sr.isError) { + ML_(symerr)(di, True, "can't mmap XCOFF archive file"); + return False; + } + + image = (UChar*)sr.res; + ok = False; + + /* Right. Let's go looking for the requested object. First, + peer at the archive's fixed header. */ + + if (n_image < sizeof(FL_HDR)) { + ML_(symerr)(di, True, "XCOFF archive too small for fixed header"); + goto done; + } + + FL_HDR* fl_hdr = (FL_HDR*)image; + if (SHOW && SHOW_AR_DETAILS) { + VG_(printf)("magic: %s\n", fl_hdr->fl_magic); + VG_(printf)("memoff: %s\n", fl_hdr->fl_memoff); + VG_(printf)("gstoff: %s\n", fl_hdr->fl_gstoff); + VG_(printf)("gst64off: %s\n", fl_hdr->fl_gst64off); + } + + { UChar* s = (UChar*)&fl_hdr->fl_magic; + if (s[0] == '<' && s[1] == 'b' && s[2] == 'i' + && s[3] == 'g' && s[4] == 'a' && s[5] == 'f' + && s[6] == '>' && s[7] == '\n') { + /* ok */ + } else { + ML_(symerr)(di, True, + "Is not XCOFF 'big'-variant .a format archive"); + goto done; + } + } + + /* Get a pointer to the member table entry. */ + UChar* mtabC = image + ascii_to_ULong(&fl_hdr->fl_memoff, + sizeof(fl_hdr->fl_memoff)); + AR_HDR* mt_hdr = (AR_HDR*)mtabC; + + if (mtabC < image || mtabC + sizeof(AR_HDR) > image + n_image) { + ML_(symerr)(di, True, + "XCOFF archive member table header exceeds image"); + goto done; + } + + /* should be: backquote newline */ + if (mt_hdr->_ar_name.ar_name[0] != 0x60 /* backquote */ + || mt_hdr->_ar_name.ar_name[1] != 0x0A /* \n */) { + ML_(symerr)(di, True, + "XCOFF archive member table header is invalid"); + goto done; + } + + if (SHOW) { + VG_(printf)("member table ar_size = %lld\n", + ascii_to_ULong(&mt_hdr->ar_size,20)); + VG_(printf)("member table ar_namlen = %lld\n", + ascii_to_ULong(&mt_hdr->ar_namlen,4)); + } + + if (mtabC < image + || mtabC + sizeof(AR_HDR) + + ascii_to_ULong(&mt_hdr->ar_size, 20) + > image + n_image) { + ML_(symerr)(di, True, "XCOFF archive member table exceeds image"); + goto done; + } + + UChar* data = mtabC + sizeof(AR_HDR) + + ascii_to_ULong(&mt_hdr->ar_namlen,4); + /* ALIGN */ + if ( ((UWord)data) & 1 ) data++; + if (SHOW) + VG_(printf)("member table data = %p\n", data); + + UInt nmembers = ascii_to_ULong(data, 20); + if (SHOW) + VG_(printf)("member table contains %d entries\n", nmembers); + for (i = 0; i < nmembers; i++) { + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)(" %d has off %d\n", + i, (Int)ascii_to_ULong(data + 20 + 20*i, 20)); + } + + UChar* p = data + 20 + 20*nmembers; + + for (i = 0; i < nmembers; i++) { + + if (0 != VG_(strcmp)(p, o_name)) + goto move_on; + + UInt objoff = ascii_to_ULong(data + 20 + 20*i, 20); + + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("got offset = %u\n", objoff); + + vg_assert(ok == False); + + /* Sanity check the selected member */ + UChar* o_hdrC = image + objoff; + if (o_hdrC + sizeof(AR_HDR) >= image + n_image) { + ML_(symerr)(di, True, + "XCOFF archive member header exceeds image"); + goto done; + } + AR_HDR* o_hdr = (AR_HDR*)o_hdrC; + UWord o_size = (UWord)ascii_to_ULong(&o_hdr->ar_size, 20); + UChar* o_data = o_hdrC + sizeof(AR_HDR) + + (UWord)ascii_to_ULong(&o_hdr->ar_namlen,4); + + /* ALIGN */ + if ( ((UWord)o_data) & 1 ) o_data++; + + if (SHOW) + VG_(printf)("member data = %p, size = %ld\n", o_data, o_size); + + if (!(o_data >= image && o_data + o_size <= image + n_image)) { + ML_(symerr)(di, True, + "XCOFF archive member exceeds image"); + goto done; + } + + if (o_size < sizeof(FILHDR)) { + ML_(symerr)(di, True, + "XCOFF object file header is implausibly small (1)"); + goto done; + } + + /* It's the right name, but need to also check the magic + number, since some archives contain both a 32-bit and + 64-bit version of the same object. */ + FILHDR* t_filhdr = (FILHDR*)o_data; +# if defined(VGP_ppc32_aix5) + if (t_filhdr->f_magic == 0x01F7 /* XCOFF64 */) { + if (0) + VG_(printf)("Skipping 64-bit archive on 32-bit platform\n"); + goto move_on; + } +# elif defined(VGP_ppc64_aix5) + if (t_filhdr->f_magic == 0x01DF /* XCOFF32 */) { + if (0) + VG_(printf)("Skipping 32-bit archive on 64-bit platform\n"); + goto move_on; + } +# endif + + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("\nimage: %p-%p object: %p-%p\n\n", + image, image+n_image-1, o_data, o_data+o_size-1); + ok = read_xcoff_mapped_object( di, o_data, o_size ); + goto done; + + vg_assert(0); + /* NOTREACHED */ + + move_on: + while (*p) { + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("%c", *p); + p++; + } + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("\n"); + p++; + } + + vg_assert(i == nmembers); + ML_(symerr)(di, True, "can't find object in XCOFF archive file"); + + done: + if (image) { + VG_(am_munmap_valgrind)( (Addr)image, n_image ); + /* assert munmap succeeded */ + } + return ok; + + } +} + + +/* Main entry point for XCOFF reading. The following di fields must + be filled in by the caller: + + filename + memname (optional) + text_avma, text_size + data_avma, data_size + + and all other fields should be zeroed. +*/ +Bool ML_(read_xcoff_debug_info) ( struct _DebugInfo* di, + Bool is_mainexe ) +{ + Bool ok; + + if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir)) { + if (di->memname) { + VG_(message)(Vg_DebugMsg, "Reading syms from %s(%s) (%#lx)", + di->filename, di->memname, di->text_avma); + } else { + VG_(message)(Vg_DebugMsg, "Reading syms from %s (%#lx)", + di->filename, di->text_avma); + } + } + + if (SHOW) { + VG_(printf)("------------------- BEGIN read xcoff ------------------\n"); + VG_(printf)("--- file: %s\n", di->filename); + VG_(printf)("--- mem: %s\n", di->memname ? di->memname + : (UChar*)"(none)" ); + VG_(printf)("--- t actual vma: %#lx\n", di->text_avma); + VG_(printf)("--- t actual len: %ld\n", di->text_size); + VG_(printf)("--- d actual vma: %#lx\n", di->data_avma); + VG_(printf)("--- d actual len: %ld\n", di->data_size); + } + + if (di->memname) { + /* XCOFF .a file. di->filename is its name, di->memname is the + name of the required .o within it. */ + ok = read_xcoff_o_or_a( di, di->filename, di->memname ); + } else { + /* no archive member name, so di->filename is an XCOFF object */ + ok = read_xcoff_o_or_a( di, NULL, di->filename ); + } + + di->soname = NULL; + if (ok) { + if (is_mainexe) { + di->soname = "NONE"; + } else { + UChar* p = VG_(strrchr)(di->filename, '/'); + p = p ? p+1 : di->filename; + /* p points at the main filename */ + if (di->memname) { + /* set the soname to "archive.a(member.o)" */ + Int nbytes = VG_(strlen)(p) + 1 + VG_(strlen)(di->memname) + 1 + 1; + UChar* so = ML_(dinfo_zalloc)("di.readxcoff.rxdi.1", nbytes); + vg_assert(so); + VG_(sprintf)(so, "%s(%s)", p, di->memname); + vg_assert(VG_(strlen)(so) == nbytes-1); + di->soname = so; + } else { + /* no member name, hence soname = "archive.a" */ + di->soname = ML_(dinfo_strdup)("di.readxcoff.rxdi.2", p); + } + } + if (SHOW) + VG_(printf)("Setting soname to %s\n", di->soname); + } + + if (SHOW) + VG_(printf)("------------------- END read xcoff ------------------\n\n"); + + return ok; +} + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/storage.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/storage.c.svn-base new file mode 100644 index 0000000..343cf3b --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/storage.c.svn-base @@ -0,0 +1,1594 @@ + +/*--------------------------------------------------------------------*/ +/*--- Format-neutral storage of and querying of info acquired from ---*/ +/*--- ELF/XCOFF stabs/dwarf1/dwarf2/dwarf3 debug info. ---*/ +/*--- storage.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +/* This file manages the data structures built by the debuginfo + system. These are: the top level SegInfo list. For each SegInfo, + there are tables for for address-to-symbol mappings, + address-to-src-file/line mappings, and address-to-CFI-info + mappings. +*/ + +#include "pub_core_basics.h" +#include "pub_core_options.h" /* VG_(clo_verbosity) */ +#include "pub_core_debuginfo.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcprint.h" +#include "pub_core_xarray.h" +#include "pub_core_oset.h" + +#include "priv_misc.h" /* dinfo_zalloc/free/strdup */ +#include "priv_d3basics.h" /* ML_(pp_GX) */ +#include "priv_tytypes.h" +#include "priv_storage.h" /* self */ + + +/*------------------------------------------------------------*/ +/*--- Misc (printing, errors) ---*/ +/*------------------------------------------------------------*/ + +/* Show a non-fatal debug info reading error. Use vg_panic if + terminal. 'serious' errors are shown regardless of the + verbosity setting. */ +void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg ) +{ + /* XML mode hides everything :-( */ + if (VG_(clo_xml)) + return; + + if (serious) { + + VG_(message)(Vg_DebugMsg, "WARNING: Serious error when " + "reading debug info"); + if (True || VG_(clo_verbosity) < 2) { + /* Need to show what the file name is, at verbosity levels 2 + or below, since that won't already have been shown */ + VG_(message)(Vg_DebugMsg, + "When reading debug info from %s:", + (di && di->filename) ? di->filename : (UChar*)"???"); + } + VG_(message)(Vg_DebugMsg, "%s", msg); + + } else { /* !serious */ + + if (VG_(clo_verbosity) >= 2) + VG_(message)(Vg_DebugMsg, "%s", msg); + + } +} + + +/* Print a symbol. */ +void ML_(ppSym) ( Int idx, DiSym* sym ) +{ + VG_(printf)( "%5d: %#8lx .. %#8lx (%d) %s\n", + idx, + sym->addr, + sym->addr + sym->size - 1, sym->size, + sym->name ); +} + +/* Print a call-frame-info summary. */ +void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si ) +{ +# define SHOW_HOW(_how, _off) \ + do { \ + if (_how == CFIR_UNKNOWN) { \ + VG_(printf)("Unknown"); \ + } else \ + if (_how == CFIR_SAME) { \ + VG_(printf)("Same"); \ + } else \ + if (_how == CFIR_CFAREL) { \ + VG_(printf)("cfa+%d", _off); \ + } else \ + if (_how == CFIR_MEMCFAREL) { \ + VG_(printf)("*(cfa+%d)", _off); \ + } else \ + if (_how == CFIR_EXPR) { \ + VG_(printf)("{"); \ + ML_(ppCfiExpr)(exprs, _off); \ + VG_(printf)("}"); \ + } else { \ + vg_assert(0+0); \ + } \ + } while (0) + + VG_(printf)("[%#lx .. %#lx]: ", si->base, + si->base + (UWord)si->len - 1); + switch (si->cfa_how) { + case CFIC_SPREL: + VG_(printf)("let cfa=oldSP+%d", si->cfa_off); + break; + case CFIC_FPREL: + VG_(printf)("let cfa=oldFP+%d", si->cfa_off); + break; + case CFIC_EXPR: + VG_(printf)("let cfa={"); + ML_(ppCfiExpr)(exprs, si->cfa_off); + VG_(printf)("}"); + break; + default: + vg_assert(0); + } + + VG_(printf)(" in RA="); + SHOW_HOW(si->ra_how, si->ra_off); + VG_(printf)(" SP="); + SHOW_HOW(si->sp_how, si->sp_off); + VG_(printf)(" FP="); + SHOW_HOW(si->fp_how, si->fp_off); + VG_(printf)("\n"); +# undef SHOW_HOW +} + + +/*------------------------------------------------------------*/ +/*--- Adding stuff ---*/ +/*------------------------------------------------------------*/ + +/* Add a str to the string table, including terminating zero, and + return pointer to the string in vg_strtab. Unless it's been seen + recently, in which case we find the old pointer and return that. + This avoids the most egregious duplications. + + JSGF: changed from returning an index to a pointer, and changed to + a chunking memory allocator rather than reallocating, so the + pointers are stable. +*/ +UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len ) +{ + struct strchunk *chunk; + Int space_needed; + UChar* p; + + if (len == -1) { + len = VG_(strlen)(str); + } else { + vg_assert(len >= 0); + } + + space_needed = 1 + len; + + // Allocate a new strtab chunk if necessary + if (di->strchunks == NULL || + (di->strchunks->strtab_used + + space_needed) > SEGINFO_STRCHUNKSIZE) { + chunk = ML_(dinfo_zalloc)("di.storage.addStr.1", sizeof(*chunk)); + chunk->strtab_used = 0; + chunk->next = di->strchunks; + di->strchunks = chunk; + } + chunk = di->strchunks; + + p = &chunk->strtab[chunk->strtab_used]; + VG_(memcpy)(p, str, len); + chunk->strtab[chunk->strtab_used+len] = '\0'; + chunk->strtab_used += space_needed; + + return p; +} + + +/* Add a symbol to the symbol table. +*/ +void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ) +{ + UInt new_sz, i; + DiSym* new_tab; + + /* Ignore zero-sized syms. */ + if (sym->size == 0) return; + + if (di->symtab_used == di->symtab_size) { + new_sz = 2 * di->symtab_size; + if (new_sz == 0) new_sz = 500; + new_tab = ML_(dinfo_zalloc)( "di.storage.addSym.1", + new_sz * sizeof(DiSym) ); + if (di->symtab != NULL) { + for (i = 0; i < di->symtab_used; i++) + new_tab[i] = di->symtab[i]; + ML_(dinfo_free)(di->symtab); + } + di->symtab = new_tab; + di->symtab_size = new_sz; + } + + di->symtab[di->symtab_used] = *sym; + di->symtab_used++; + vg_assert(di->symtab_used <= di->symtab_size); +} + + +/* Add a location to the location table. +*/ +static void addLoc ( struct _DebugInfo* di, DiLoc* loc ) +{ + UInt new_sz, i; + DiLoc* new_tab; + + /* Zero-sized locs should have been ignored earlier */ + vg_assert(loc->size > 0); + + if (di->loctab_used == di->loctab_size) { + new_sz = 2 * di->loctab_size; + if (new_sz == 0) new_sz = 500; + new_tab = ML_(dinfo_zalloc)( "di.storage.addLoc.1", + new_sz * sizeof(DiLoc) ); + if (di->loctab != NULL) { + for (i = 0; i < di->loctab_used; i++) + new_tab[i] = di->loctab[i]; + ML_(dinfo_free)(di->loctab); + } + di->loctab = new_tab; + di->loctab_size = new_sz; + } + + di->loctab[di->loctab_used] = *loc; + di->loctab_used++; + vg_assert(di->loctab_used <= di->loctab_size); +} + + +/* Top-level place to call to add a source-location mapping entry. +*/ +void ML_(addLineInfo) ( struct _DebugInfo* di, + UChar* filename, + UChar* dirname, /* NULL == directory is unknown */ + Addr this, + Addr next, + Int lineno, + Int entry /* only needed for debug printing */ + ) +{ + static const Bool debug = False; + DiLoc loc; + Int size = next - this; + + /* Ignore zero-sized locs */ + if (this == next) return; + + if (debug) + VG_(printf)( " src %s %s line %d %#lx-%#lx\n", + dirname ? dirname : (UChar*)"(unknown)", + filename, lineno, this, next ); + + /* Maximum sanity checking. Some versions of GNU as do a shabby + * job with stabs entries; if anything looks suspicious, revert to + * a size of 1. This should catch the instruction of interest + * (since if using asm-level debug info, one instruction will + * correspond to one line, unlike with C-level debug info where + * multiple instructions can map to the one line), but avoid + * catching any other instructions bogusly. */ + if (this > next) { + if (VG_(clo_verbosity) > 2) { + VG_(message)(Vg_DebugMsg, + "warning: line info addresses out of order " + "at entry %d: 0x%lx 0x%lx", entry, this, next); + } + size = 1; + } + + if (size > MAX_LOC_SIZE) { + if (0) + VG_(message)(Vg_DebugMsg, + "warning: line info address range too large " + "at entry %d: %d", entry, size); + size = 1; + } + + /* Rule out ones which are completely outside the r-x mapped area. + See "Comment_Regarding_Text_Range_Checks" elsewhere in this file + for background and rationale. */ + vg_assert(di->have_rx_map && di->have_rw_map); + if (next-1 < di->rx_map_avma + || this >= di->rx_map_avma + di->rx_map_size ) { + if (0) + VG_(message)(Vg_DebugMsg, + "warning: ignoring line info entry falling " + "outside current DebugInfo: %#lx %#lx %#lx %#lx", + di->text_avma, + di->text_avma + di->text_size, + this, next-1); + return; + } + + vg_assert(lineno >= 0); + if (lineno > MAX_LINENO) { + static Bool complained = False; + if (!complained) { + complained = True; + VG_(message)(Vg_UserMsg, + "warning: ignoring line info entry with " + "huge line number (%d)", lineno); + VG_(message)(Vg_UserMsg, + " Can't handle line numbers " + "greater than %d, sorry", MAX_LINENO); + VG_(message)(Vg_UserMsg, + "(Nb: this message is only shown once)"); + } + return; + } + + loc.addr = this; + loc.size = (UShort)size; + loc.lineno = lineno; + loc.filename = filename; + loc.dirname = dirname; + + if (0) VG_(message)(Vg_DebugMsg, + "addLoc: addr %#lx, size %d, line %d, file %s", + this,size,lineno,filename); + + addLoc ( di, &loc ); +} + + +/* Top-level place to call to add a CFI summary record. The supplied + DiCfSI is copied. */ +void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi_orig ) +{ + static const Bool debug = False; + UInt new_sz, i; + DiCfSI* new_tab; + SSizeT delta; + + /* copy the original, so we can mess with it */ + DiCfSI cfsi = *cfsi_orig; + + if (debug) { + VG_(printf)("adding DiCfSI: "); + ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi); + } + + /* sanity */ + vg_assert(cfsi.len > 0); + /* If this fails, the implication is you have a single procedure + with more than 5 million bytes of code. Which is pretty + unlikely. Either that, or the debuginfo reader is somehow + broken. 5 million is of course arbitrary; but it's big enough + to be bigger than the size of any plausible piece of code that + would fall within a single procedure. */ + vg_assert(cfsi.len < 5000000); + + vg_assert(di->have_rx_map && di->have_rw_map); + /* If we have an empty r-x mapping (is that possible?) then the + DiCfSI can't possibly fall inside it. In which case skip. */ + if (di->rx_map_size == 0) + return; + + /* Rule out ones which are completely outside the r-x mapped area. + See "Comment_Regarding_Text_Range_Checks" elsewhere in this file + for background and rationale. */ + if (cfsi.base + cfsi.len - 1 < di->rx_map_avma + || cfsi.base >= di->rx_map_avma + di->rx_map_size) { + static Int complaints = 10; + if (VG_(clo_trace_cfi) || complaints > 0) { + complaints--; + if (VG_(clo_verbosity) > 1) { + VG_(message)( + Vg_DebugMsg, + "warning: DiCfSI %#lx .. %#lx outside segment %#lx .. %#lx", + cfsi.base, + cfsi.base + cfsi.len - 1, + di->text_avma, + di->text_avma + di->text_size - 1 + ); + } + if (VG_(clo_trace_cfi)) + ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi); + } + return; + } + + /* Now we know the range is at least partially inside the r-x + mapped area. That implies that at least one of the ends of the + range falls inside the area. If necessary, clip it so it is + completely within the area. If we don't do this, + check_CFSI_related_invariants() in debuginfo.c (invariant #2) + will fail. See + "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in + priv_storage.h for background. */ + if (cfsi.base < di->rx_map_avma) { + /* Lower end is outside the mapped area. Hence upper end must + be inside it. */ + if (0) VG_(printf)("XXX truncate lower\n"); + vg_assert(cfsi.base + cfsi.len - 1 >= di->rx_map_avma); + delta = (SSizeT)(di->rx_map_avma - cfsi.base); + vg_assert(delta > 0); + vg_assert(delta < (SSizeT)cfsi.len); + cfsi.base += delta; + cfsi.len -= delta; + } + else + if (cfsi.base + cfsi.len - 1 > di->rx_map_avma + di->rx_map_size - 1) { + /* Upper end is outside the mapped area. Hence lower end must be + inside it. */ + if (0) VG_(printf)("XXX truncate upper\n"); + vg_assert(cfsi.base <= di->rx_map_avma + di->rx_map_size - 1); + delta = (SSizeT)( (cfsi.base + cfsi.len - 1) + - (di->rx_map_avma + di->rx_map_size - 1) ); + vg_assert(delta > 0); vg_assert(delta < (SSizeT)cfsi.len); + cfsi.len -= delta; + } + + /* Final checks */ + + /* Because: either cfsi was entirely inside the range, in which + case we asserted that len > 0 at the start, OR it fell partially + inside the range, in which case we reduced it by some size + (delta) which is < its original size. */ + vg_assert(cfsi.len > 0); + + /* Similar logic applies for the next two assertions. */ + vg_assert(cfsi.base >= di->rx_map_avma); + vg_assert(cfsi.base + cfsi.len - 1 + <= di->rx_map_avma + di->rx_map_size - 1); + + if (di->cfsi_used == di->cfsi_size) { + new_sz = 2 * di->cfsi_size; + if (new_sz == 0) new_sz = 20; + new_tab = ML_(dinfo_zalloc)( "di.storage.addDiCfSI.1", + new_sz * sizeof(DiCfSI) ); + if (di->cfsi != NULL) { + for (i = 0; i < di->cfsi_used; i++) + new_tab[i] = di->cfsi[i]; + ML_(dinfo_free)(di->cfsi); + } + di->cfsi = new_tab; + di->cfsi_size = new_sz; + } + + di->cfsi[di->cfsi_used] = cfsi; + di->cfsi_used++; + vg_assert(di->cfsi_used <= di->cfsi_size); +} + + +Int ML_(CfiExpr_Undef)( XArray* dst ) +{ + CfiExpr e; + VG_(memset)( &e, 0, sizeof(e) ); + e.tag = Cex_Undef; + return (Int)VG_(addToXA)( dst, &e ); +} +Int ML_(CfiExpr_Deref)( XArray* dst, Int ixAddr ) +{ + CfiExpr e; + VG_(memset)( &e, 0, sizeof(e) ); + e.tag = Cex_Deref; + e.Cex.Deref.ixAddr = ixAddr; + return (Int)VG_(addToXA)( dst, &e ); +} +Int ML_(CfiExpr_Const)( XArray* dst, UWord con ) +{ + CfiExpr e; + VG_(memset)( &e, 0, sizeof(e) ); + e.tag = Cex_Const; + e.Cex.Const.con = con; + return (Int)VG_(addToXA)( dst, &e ); +} +Int ML_(CfiExpr_Binop)( XArray* dst, CfiOp op, Int ixL, Int ixR ) +{ + CfiExpr e; + VG_(memset)( &e, 0, sizeof(e) ); + e.tag = Cex_Binop; + e.Cex.Binop.op = op; + e.Cex.Binop.ixL = ixL; + e.Cex.Binop.ixR = ixR; + return (Int)VG_(addToXA)( dst, &e ); +} +Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg ) +{ + CfiExpr e; + VG_(memset)( &e, 0, sizeof(e) ); + e.tag = Cex_CfiReg; + e.Cex.CfiReg.reg = reg; + return (Int)VG_(addToXA)( dst, &e ); +} +Int ML_(CfiExpr_DwReg)( XArray* dst, Int reg ) +{ + CfiExpr e; + VG_(memset)( &e, 0, sizeof(e) ); + e.tag = Cex_DwReg; + e.Cex.DwReg.reg = reg; + return (Int)VG_(addToXA)( dst, &e ); +} + +static void ppCfiOp ( CfiOp op ) +{ + switch (op) { + case Cop_Add: VG_(printf)("+"); break; + case Cop_Sub: VG_(printf)("-"); break; + case Cop_And: VG_(printf)("&"); break; + case Cop_Mul: VG_(printf)("*"); break; + default: vg_assert(0); + } +} + +static void ppCfiReg ( CfiReg reg ) +{ + switch (reg) { + case Creg_SP: VG_(printf)("SP"); break; + case Creg_FP: VG_(printf)("FP"); break; + case Creg_IP: VG_(printf)("IP"); break; + default: vg_assert(0); + } +} + +void ML_(ppCfiExpr)( XArray* src, Int ix ) +{ + /* VG_(indexXA) checks for invalid src/ix values, so we can + use it indiscriminately. */ + CfiExpr* e = (CfiExpr*) VG_(indexXA)( src, ix ); + switch (e->tag) { + case Cex_Undef: + VG_(printf)("Undef"); + break; + case Cex_Deref: + VG_(printf)("*("); + ML_(ppCfiExpr)(src, e->Cex.Deref.ixAddr); + VG_(printf)(")"); + break; + case Cex_Const: + VG_(printf)("0x%lx", e->Cex.Const.con); + break; + case Cex_Binop: + VG_(printf)("("); + ML_(ppCfiExpr)(src, e->Cex.Binop.ixL); + VG_(printf)(")"); + ppCfiOp(e->Cex.Binop.op); + VG_(printf)("("); + ML_(ppCfiExpr)(src, e->Cex.Binop.ixR); + VG_(printf)(")"); + break; + case Cex_CfiReg: + ppCfiReg(e->Cex.CfiReg.reg); + break; + case Cex_DwReg: + VG_(printf)("dwr%d", e->Cex.DwReg.reg); + break; + default: + VG_(core_panic)("ML_(ppCfiExpr)"); + /*NOTREACHED*/ + break; + } +} + + +Word ML_(cmp_for_DiAddrRange_range) ( const void* keyV, + const void* elemV ) { + const Addr* key = (const Addr*)keyV; + const DiAddrRange* elem = (const DiAddrRange*)elemV; + if (0) + VG_(printf)("cmp_for_DiAddrRange_range: %#lx vs %#lx\n", + *key, elem->aMin); + if ((*key) < elem->aMin) return -1; + if ((*key) > elem->aMax) return 1; + return 0; +} + +static +void show_scope ( OSet* /* of DiAddrRange */ scope, HChar* who ) +{ + DiAddrRange* range; + VG_(printf)("Scope \"%s\" = {\n", who); + VG_(OSetGen_ResetIter)( scope ); + while (True) { + range = VG_(OSetGen_Next)( scope ); + if (!range) break; + VG_(printf)(" %#lx .. %#lx: %lu vars\n", range->aMin, range->aMax, + range->vars ? VG_(sizeXA)(range->vars) : 0); + } + VG_(printf)("}\n"); +} + +/* Add the variable 'var' to 'scope' for the address range [aMin,aMax] + (inclusive of aMin and aMax). Split existing ranges as required if + aMin or aMax or both don't match existing range boundaries, and add + 'var' to all required ranges. Take great care to preserve the + invariant that the ranges in 'scope' cover the entire address range + exactly once, with no overlaps and no holes. */ +static void add_var_to_arange ( + /*MOD*/OSet* /* of DiAddrRange */ scope, + Addr aMin, + Addr aMax, + DiVariable* var + ) +{ + DiAddrRange *first, *last, *range; + /* These xx variables are for assertion checking only; they don't + contribute anything to the actual work of this function. */ + DiAddrRange *xxRangep, *xxFirst, *xxLast; + UWord xxIters; + + vg_assert(aMin <= aMax); + + if (0) VG_(printf)("add_var_to_arange: %#lx .. %#lx\n", aMin, aMax); + if (0) show_scope( scope, "add_var_to_arange(1)" ); + + /* See if the lower end of the range (aMin) falls exactly on an + existing range boundary. If not, find the range it does fall + into, and split it (copying the variables in the process), so + that aMin does exactly fall on a range boundary. */ + first = VG_(OSetGen_Lookup)( scope, &aMin ); + /* It must be present, since the presented OSet must cover + the entire address range. */ + vg_assert(first); + vg_assert(first->aMin <= first->aMax); + vg_assert(first->aMin <= aMin && aMin <= first->aMax); + + /* Fast track common case, which is that the range specified for + the variable exactly coincides with one already-existing + range. */ + if (first->aMin == aMin && first->aMax == aMax) { + vg_assert(first->vars); + VG_(addToXA)( first->vars, var ); + return; + } + + /* We have to get into splitting ranges, which is complex + and slow. */ + if (first->aMin < aMin) { + DiAddrRange* nyu; + /* Ok. We'll have to split 'first'. */ + /* truncate the upper end of 'first' */ + Addr tmp = first->aMax; + first->aMax = aMin-1; + vg_assert(first->aMin <= first->aMax); + /* create a new range */ + nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) ); + vg_assert(nyu); + nyu->aMin = aMin; + nyu->aMax = tmp; + vg_assert(nyu->aMin <= nyu->aMax); + /* copy vars into it */ + vg_assert(first->vars); + nyu->vars = VG_(cloneXA)( "di.storage.avta.1", first->vars ); + vg_assert(nyu->vars); + VG_(OSetGen_Insert)( scope, nyu ); + first = nyu; + } + + vg_assert(first->aMin == aMin); + + /* Now do exactly the same for the upper end (aMax): if it doesn't + fall on a boundary, cause it to do so by splitting the range it + does currently fall into. */ + last = VG_(OSetGen_Lookup)( scope, &aMax ); + vg_assert(last->aMin <= last->aMax); + vg_assert(last->aMin <= aMax && aMax <= last->aMax); + + if (aMax < last->aMax) { + DiAddrRange* nyu; + /* We have to split 'last'. */ + /* truncate the lower end of 'last' */ + Addr tmp = last->aMin; + last->aMin = aMax+1; + vg_assert(last->aMin <= last->aMax); + /* create a new range */ + nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) ); + vg_assert(nyu); + nyu->aMin = tmp; + nyu->aMax = aMax; + vg_assert(nyu->aMin <= nyu->aMax); + /* copy vars into it */ + vg_assert(last->vars); + nyu->vars = VG_(cloneXA)( "di.storage.avta.2", last->vars ); + vg_assert(nyu->vars); + VG_(OSetGen_Insert)( scope, nyu ); + last = nyu; + } + + vg_assert(aMax == last->aMax); + + xxFirst = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMin); + xxLast = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMax); + vg_assert(xxFirst); + vg_assert(xxLast); + vg_assert(xxFirst->aMin == aMin); + vg_assert(xxLast->aMax == aMax); + if (xxFirst != xxLast) + vg_assert(xxFirst->aMax < xxLast->aMin); + + /* Great. Now we merely need to iterate over the segments from + 'first' to 'last' inclusive, and add 'var' to the variable set + of each of them. */ + if (0) { + static UWord ctr = 0; + ctr++; + VG_(printf)("ctr = %lu\n", ctr); + if (ctr >= 33263) show_scope( scope, "add_var_to_arange(2)" ); + } + + xxIters = 0; + range = xxRangep = NULL; + VG_(OSetGen_ResetIterAt)( scope, &aMin ); + while (True) { + xxRangep = range; + range = VG_(OSetGen_Next)( scope ); + if (!range) break; + if (range->aMin > aMax) break; + xxIters++; + if (0) VG_(printf)("have range %#lx %#lx\n", + range->aMin, range->aMax); + + /* Sanity checks */ + if (!xxRangep) { + /* This is the first in the range */ + vg_assert(range->aMin == aMin); + } else { + vg_assert(xxRangep->aMax + 1 == range->aMin); + } + + vg_assert(range->vars); + VG_(addToXA)( range->vars, var ); + } + /* Done. We should have seen at least one range. */ + vg_assert(xxIters >= 1); + if (xxIters == 1) vg_assert(xxFirst == xxLast); + if (xxFirst == xxLast) vg_assert(xxIters == 1); + vg_assert(xxRangep); + vg_assert(xxRangep->aMax == aMax); + vg_assert(xxRangep == xxLast); +} + + +/* Top-level place to call to add a variable description (as extracted + from a DWARF3 .debug_info section. */ +void ML_(addVar)( struct _DebugInfo* di, + Int level, + Addr aMin, + Addr aMax, + UChar* name, /* in di's .strchunks */ + UWord typeR, /* a cuOff */ + GExpr* gexpr, + GExpr* fbGX, + UChar* fileName, /* where decl'd - may be NULL. + in di's .strchunks */ + Int lineNo, /* where decl'd - may be zero */ + Bool show ) +{ + OSet* /* of DiAddrRange */ scope; + DiVariable var; + Bool all; + TyEnt* ent; + MaybeULong mul; + HChar* badness; + + tl_assert(di && di->admin_tyents); + + if (0) { + VG_(printf)(" ML_(addVar): level %d %#lx-%#lx %s :: ", + level, aMin, aMax, name ); + ML_(pp_TyEnt_C_ishly)( di->admin_tyents, typeR ); + VG_(printf)("\n Var="); + ML_(pp_GX)(gexpr); + VG_(printf)("\n"); + if (fbGX) { + VG_(printf)(" FrB="); + ML_(pp_GX)( fbGX ); + VG_(printf)("\n"); + } else { + VG_(printf)(" FrB=none\n"); + } + VG_(printf)("\n"); + } + + vg_assert(level >= 0); + vg_assert(aMin <= aMax); + vg_assert(name); + vg_assert(gexpr); + + ent = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL, typeR); + tl_assert(ent); + vg_assert(ML_(TyEnt__is_type)(ent)); + + /* "Comment_Regarding_Text_Range_Checks" (is referred to elsewhere) + ---------------------------------------------------------------- + Ignore any variables whose aMin .. aMax (that is, range of text + addresses for which they actually exist) falls outside the text + segment. Is this indicative of a bug in the reader? Maybe. + (LATER): instead of restricting strictly to the .text segment, + be a bit more relaxed, and accept any variable whose text range + falls inside the r-x mapped area. This is useful because .text + is not always the only instruction-carrying segment: others are: + .init .plt __libc_freeres_fn and .fini. This implicitly assumes + that those extra sections have the same bias as .text, but that + seems a reasonable assumption to me. */ + /* This is assured us by top level steering logic in debuginfo.c, + and it is re-checked at the start of + ML_(read_elf_debug_info). */ + vg_assert(di->have_rx_map && di->have_rw_map); + if (level > 0 + && (aMax < di->rx_map_avma + || aMin >= di->rx_map_avma + di->rx_map_size)) { + if (VG_(clo_verbosity) >= 0) { + VG_(message)(Vg_DebugMsg, + "warning: addVar: in range %#lx .. %#lx outside " + "segment %#lx .. %#lx (%s)", + aMin, aMax, + di->text_avma, di->text_avma + di->text_size -1, + name + ); + } + return; + } + + /* If the type's size is zero (which can mean unknown size), ignore + it. We will never be able to actually relate a data address to + a data object with zero size, so there's no point in storing + info on it. On 32-bit platforms, also reject types whose size + is 2^32 bytes or large. (It's amazing what junk shows up ..) */ + mul = ML_(sizeOfType)(di->admin_tyents, typeR); + + badness = NULL; + if (mul.b != True) + badness = "unknown size"; + else if (mul.ul == 0) + badness = "zero size "; + else if (sizeof(void*) == 4 && mul.ul >= (1ULL<<32)) + badness = "implausibly large"; + + if (badness) { + static Int complaints = 10; + if (VG_(clo_verbosity) >= 2 && complaints > 0) { + VG_(message)(Vg_DebugMsg, "warning: addVar: %s (%s)", + badness, name ); + complaints--; + } + return; + } + + if (!di->varinfo) { + di->varinfo = VG_(newXA)( ML_(dinfo_zalloc), + "di.storage.addVar.1", + ML_(dinfo_free), + sizeof(OSet*) ); + } + + vg_assert(level < 256); /* arbitrary; stay sane */ + /* Expand the top level array enough to map this level */ + while ( VG_(sizeXA)(di->varinfo) <= level ) { + DiAddrRange* nyu; + scope = VG_(OSetGen_Create)( offsetof(DiAddrRange,aMin), + ML_(cmp_for_DiAddrRange_range), + ML_(dinfo_zalloc), "di.storage.addVar.2", + ML_(dinfo_free) ); + vg_assert(scope); + if (0) VG_(printf)("create: scope = %p, adding at %ld\n", + scope, VG_(sizeXA)(di->varinfo)); + VG_(addToXA)( di->varinfo, &scope ); + /* Add a single range covering the entire address space. At + level 0 we require this doesn't get split. At levels above 0 + we require that any additions to it cause it to get split. + All of these invariants get checked both add_var_to_arange + and after reading is complete, in canonicaliseVarInfo. */ + nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) ); + vg_assert(nyu); + nyu->aMin = (Addr)0; + nyu->aMax = ~(Addr)0; + nyu->vars = VG_(newXA)( ML_(dinfo_zalloc), "di.storage.addVar.3", + ML_(dinfo_free), + sizeof(DiVariable) ); + vg_assert(nyu->vars); + VG_(OSetGen_Insert)( scope, nyu ); + } + + vg_assert( VG_(sizeXA)(di->varinfo) > level ); + scope = *(OSet**)VG_(indexXA)( di->varinfo, level ); + vg_assert(scope); + + var.name = name; + var.typeR = typeR; + var.gexpr = gexpr; + var.fbGX = fbGX; + var.fileName = fileName; + var.lineNo = lineNo; + + all = aMin == (Addr)0 && aMax == ~(Addr)0; + vg_assert(level == 0 ? all : !all); + + add_var_to_arange( /*MOD*/scope, aMin, aMax, &var ); +} + + +/* This really just checks the constructed data structure, as there is + no canonicalisation to do. */ +static void canonicaliseVarInfo ( struct _DebugInfo* di ) +{ + Word i, nInThisScope; + + if (!di->varinfo) + return; + + for (i = 0; i < VG_(sizeXA)(di->varinfo); i++) { + + DiAddrRange *range, *rangep; + OSet* scope = *(OSet**)VG_(indexXA)(di->varinfo, i); + if (!scope) continue; + + /* Deal with the global-scope case. */ + if (i == 0) { + Addr zero = 0; + vg_assert(VG_(OSetGen_Size)( scope ) == 1); + range = VG_(OSetGen_Lookup)( scope, &zero ); + vg_assert(range); + vg_assert(range->aMin == (Addr)0); + vg_assert(range->aMax == ~(Addr)0); + continue; + } + + /* All the rest of this is for the local-scope case. */ + /* iterate over all entries in 'scope' */ + nInThisScope = 0; + rangep = NULL; + VG_(OSetGen_ResetIter)(scope); + while (True) { + range = VG_(OSetGen_Next)(scope); + if (!range) { + /* We just saw the last one. There must have been at + least one entry in the range. */ + vg_assert(rangep); + vg_assert(rangep->aMax == ~(Addr)0); + break; + } + + vg_assert(range->aMin <= range->aMax); + vg_assert(range->vars); + + if (!rangep) { + /* This is the first entry in the range. */ + vg_assert(range->aMin == 0); + } else { + vg_assert(rangep->aMax + 1 == range->aMin); + } + + rangep = range; + nInThisScope++; + } /* iterating over ranges in a given scope */ + + /* If there's only one entry in this (local) scope, it must + cover the entire address space (obviously), but it must not + contain any vars. */ + + vg_assert(nInThisScope > 0); + if (nInThisScope == 1) { + Addr zero = 0; + vg_assert(VG_(OSetGen_Size)( scope ) == 1); + range = VG_(OSetGen_Lookup)( scope, &zero ); + vg_assert(range); + vg_assert(range->aMin == (Addr)0); + vg_assert(range->aMax == ~(Addr)0); + vg_assert(range->vars); + vg_assert(VG_(sizeXA)(range->vars) == 0); + } + + } /* iterate over scopes */ +} + + +/*------------------------------------------------------------*/ +/*--- Canonicalisers ---*/ +/*------------------------------------------------------------*/ + +/* Sort the symtab by starting address, and emit warnings if any + symbols have overlapping address ranges. We use that old chestnut, + shellsort. Mash the table around so as to establish the property + that addresses are in order and the ranges to not overlap. This + facilitates using binary search to map addresses to symbols when we + come to query the table. +*/ +static Int compare_DiSym ( void* va, void* vb ) +{ + DiSym* a = (DiSym*)va; + DiSym* b = (DiSym*)vb; + if (a->addr < b->addr) return -1; + if (a->addr > b->addr) return 1; + return 0; +} + + +/* Two symbols have the same address. Which name do we prefer? In order: + + - Prefer "PMPI_<foo>" over "MPI_<foo>". + + - Else, prefer a non-NULL name over a NULL one. + + - Else, prefer a non-whitespace name over an all-whitespace name. + + - Else, prefer the shorter symbol name. If the symbol contains a + version symbol ('@' on Linux, other platforms may differ), which means it + is versioned, then the length up to the version symbol is used for length + comparison purposes (so "foo@GLIBC_2.4.2" is considered shorter than + "foobar"). + + - Else, if two symbols have the same length, prefer a versioned symbol over + a non-versioned symbol. + + - Else, use alphabetical ordering. + + - Otherwise, they must be the same; use the symbol with the lower address. + + Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are + aliases in glibc, we choose the 'bcmp' symbol because it's shorter, + so we can misdescribe memcmp() as bcmp()). This is hard to avoid. + It's mentioned in the FAQ file. + */ +static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b ) +{ + Word cmp; + Word vlena, vlenb; /* length without version */ + const UChar *vpa, *vpb; + + Bool preferA = False; + Bool preferB = False; + + vg_assert(a->addr == b->addr); + + vlena = VG_(strlen)(a->name); + vlenb = VG_(strlen)(b->name); + +#if defined(VGO_linux) || defined(VGO_aix5) +# define VERSION_CHAR '@' +#else +# error Unknown OS +#endif + + vpa = VG_(strchr)(a->name, VERSION_CHAR); + vpb = VG_(strchr)(b->name, VERSION_CHAR); + + if (vpa) + vlena = vpa - a->name; + if (vpb) + vlenb = vpb - b->name; + + /* MPI hack: prefer PMPI_Foo over MPI_Foo */ + if (0==VG_(strncmp)(a->name, "MPI_", 4) + && 0==VG_(strncmp)(b->name, "PMPI_", 5) + && 0==VG_(strcmp)(a->name, 1+b->name)) { + preferB = True; goto out; + } + if (0==VG_(strncmp)(b->name, "MPI_", 4) + && 0==VG_(strncmp)(a->name, "PMPI_", 5) + && 0==VG_(strcmp)(b->name, 1+a->name)) { + preferA = True; goto out; + } + + /* Prefer non-empty name. */ + if (vlena && !vlenb) { + preferA = True; goto out; + } + if (vlenb && !vlena) { + preferB = True; goto out; + } + + /* Prefer non-whitespace name. */ + { + Bool blankA = True; + Bool blankB = True; + Char *s; + s = a->name; + while (*s) { + if (!VG_(isspace)(*s++)) { + blankA = False; + break; + } + } + s = b->name; + while (*s) { + if (!VG_(isspace)(*s++)) { + blankB = False; + break; + } + } + + if (!blankA && blankB) { + preferA = True; goto out; + } + if (!blankB && blankA) { + preferB = True; goto out; + } + } + + /* Select the shortest unversioned name */ + if (vlena < vlenb) { + preferA = True; goto out; + } + if (vlenb < vlena) { + preferB = True; goto out; + } + + /* Equal lengths; select the versioned name */ + if (vpa && !vpb) { + preferA = True; goto out; + } + if (vpb && !vpa) { + preferB = True; goto out; + } + + /* Either both versioned or neither is versioned; select them + alphabetically */ + cmp = VG_(strcmp)(a->name, b->name); + if (cmp < 0) { + preferA = True; goto out; + } + if (cmp > 0) { + preferB = True; goto out; + } + /* If we get here, they are the same name. */ + + /* In this case we could choose either (arbitrarily), but might as + well choose the one with the lowest DiSym* address, so as to try + and make the comparison mechanism more stable (a la sorting + parlance). Also, skip the diagnostic printing in this case. */ + return a <= b ? a : b; + + /*NOTREACHED*/ + vg_assert(0); + out: + if (preferA && !preferB) { + TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n", + a->addr, a->name, b->name ); + return a; + } + if (preferB && !preferA) { + TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n", + b->addr, b->name, a->name ); + return b; + } + /*NOTREACHED*/ + vg_assert(0); +} + +static void canonicaliseSymtab ( struct _DebugInfo* di ) +{ + Word i, j, n_merged, n_truncated; + Addr s1, s2, e1, e2; + +# define SWAP(ty,aa,bb) \ + do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0) + + if (di->symtab_used == 0) + return; + + VG_(ssort)(di->symtab, di->symtab_used, + sizeof(*di->symtab), compare_DiSym); + + cleanup_more: + + /* If two symbols have identical address ranges, we pick one + using prefersym() (see it for details). */ + do { + n_merged = 0; + j = di->symtab_used; + di->symtab_used = 0; + for (i = 0; i < j; i++) { + if (i < j-1 + && di->symtab[i].addr == di->symtab[i+1].addr + && di->symtab[i].size == di->symtab[i+1].size) { + n_merged++; + /* merge the two into one */ + di->symtab[di->symtab_used++] + = *prefersym(di, &di->symtab[i], &di->symtab[i+1]); + i++; + } else { + di->symtab[di->symtab_used++] = di->symtab[i]; + } + } + TRACE_SYMTAB( "canonicaliseSymtab: %ld symbols merged\n", n_merged); + } + while (n_merged > 0); + + /* Detect and "fix" overlapping address ranges. */ + n_truncated = 0; + + for (i = 0; i < ((Word)di->symtab_used) -1; i++) { + + vg_assert(di->symtab[i].addr <= di->symtab[i+1].addr); + + /* Check for common (no overlap) case. */ + if (di->symtab[i].addr + di->symtab[i].size + <= di->symtab[i+1].addr) + continue; + + /* There's an overlap. Truncate one or the other. */ + if (di->trace_symtab) { + VG_(printf)("overlapping address ranges in symbol table\n\t"); + ML_(ppSym)( i, &di->symtab[i] ); + VG_(printf)("\t"); + ML_(ppSym)( i+1, &di->symtab[i+1] ); + VG_(printf)("\n"); + } + + /* Truncate one or the other. */ + s1 = di->symtab[i].addr; + s2 = di->symtab[i+1].addr; + e1 = s1 + di->symtab[i].size - 1; + e2 = s2 + di->symtab[i+1].size - 1; + if (s1 < s2) { + e1 = s2-1; + } else { + vg_assert(s1 == s2); + if (e1 > e2) { + s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); + } else + if (e1 < e2) { + s2 = e1+1; + } else { + /* e1 == e2. Identical addr ranges. We'll eventually wind + up back at cleanup_more, which will take care of it. */ + } + } + di->symtab[i].addr = s1; + di->symtab[i+1].addr = s2; + di->symtab[i].size = e1 - s1 + 1; + di->symtab[i+1].size = e2 - s2 + 1; + vg_assert(s1 <= s2); + vg_assert(di->symtab[i].size > 0); + vg_assert(di->symtab[i+1].size > 0); + /* It may be that the i+1 entry now needs to be moved further + along to maintain the address order requirement. */ + j = i+1; + while (j < ((Word)di->symtab_used)-1 + && di->symtab[j].addr > di->symtab[j+1].addr) { + SWAP(DiSym,di->symtab[j],di->symtab[j+1]); + j++; + } + n_truncated++; + } + + if (n_truncated > 0) goto cleanup_more; + + /* Ensure relevant postconditions hold. */ + for (i = 0; i < ((Word)di->symtab_used)-1; i++) { + /* No zero-sized symbols. */ + vg_assert(di->symtab[i].size > 0); + /* In order. */ + vg_assert(di->symtab[i].addr < di->symtab[i+1].addr); + /* No overlaps. */ + vg_assert(di->symtab[i].addr + di->symtab[i].size - 1 + < di->symtab[i+1].addr); + } +# undef SWAP +} + + +/* Sort the location table by starting address. Mash the table around + so as to establish the property that addresses are in order and the + ranges do not overlap. This facilitates using binary search to map + addresses to locations when we come to query the table. +*/ +static Int compare_DiLoc ( void* va, void* vb ) +{ + DiLoc* a = (DiLoc*)va; + DiLoc* b = (DiLoc*)vb; + if (a->addr < b->addr) return -1; + if (a->addr > b->addr) return 1; + return 0; +} + +static void canonicaliseLoctab ( struct _DebugInfo* di ) +{ + Word i, j; + +# define SWAP(ty,aa,bb) \ + do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0); + + if (di->loctab_used == 0) + return; + + /* Sort by start address. */ + VG_(ssort)(di->loctab, di->loctab_used, + sizeof(*di->loctab), compare_DiLoc); + + /* If two adjacent entries overlap, truncate the first. */ + for (i = 0; i < ((Word)di->loctab_used)-1; i++) { + vg_assert(di->loctab[i].size < 10000); + if (di->loctab[i].addr + di->loctab[i].size > di->loctab[i+1].addr) { + /* Do this in signed int32 because the actual .size fields + are only 12 bits. */ + Int new_size = di->loctab[i+1].addr - di->loctab[i].addr; + if (new_size < 0) { + di->loctab[i].size = 0; + } else + if (new_size > MAX_LOC_SIZE) { + di->loctab[i].size = MAX_LOC_SIZE; + } else { + di->loctab[i].size = (UShort)new_size; + } + } + } + + /* Zap any zero-sized entries resulting from the truncation + process. */ + j = 0; + for (i = 0; i < (Word)di->loctab_used; i++) { + if (di->loctab[i].size > 0) { + if (j != i) + di->loctab[j] = di->loctab[i]; + j++; + } + } + di->loctab_used = j; + + /* Ensure relevant postconditions hold. */ + for (i = 0; i < ((Word)di->loctab_used)-1; i++) { + /* + VG_(printf)("%d (%d) %d 0x%x\n", + i, di->loctab[i+1].confident, + di->loctab[i+1].size, di->loctab[i+1].addr ); + */ + /* No zero-sized symbols. */ + vg_assert(di->loctab[i].size > 0); + /* In order. */ + vg_assert(di->loctab[i].addr < di->loctab[i+1].addr); + /* No overlaps. */ + vg_assert(di->loctab[i].addr + di->loctab[i].size - 1 + < di->loctab[i+1].addr); + } +# undef SWAP +} + + +/* Sort the call-frame-info table by starting address. Mash the table + around so as to establish the property that addresses are in order + and the ranges do not overlap. This facilitates using binary + search to map addresses to locations when we come to query the + table. + + Also, set cfisi_minaddr and cfisi_maxaddr to be the min and max of + any of the address ranges contained in cfisi[0 .. cfisi_used-1], so + as to facilitate rapidly skipping this SegInfo when looking for an + address which falls outside that range. +*/ +static Int compare_DiCfSI ( void* va, void* vb ) +{ + DiCfSI* a = (DiCfSI*)va; + DiCfSI* b = (DiCfSI*)vb; + if (a->base < b->base) return -1; + if (a->base > b->base) return 1; + return 0; +} + +static void canonicaliseCFI ( struct _DebugInfo* di ) +{ + Word i, j; + const Addr minAvma = 0; + const Addr maxAvma = ~minAvma; + + /* Note: take care in here. di->cfsi can be NULL, in which + case _used and _size fields will be zero. */ + if (di->cfsi == NULL) { + vg_assert(di->cfsi_used == 0); + vg_assert(di->cfsi_size == 0); + } + + /* Set cfsi_minavma and cfsi_maxavma to summarise the entire + address range contained in cfsi[0 .. cfsi_used-1]. */ + di->cfsi_minavma = maxAvma; + di->cfsi_maxavma = minAvma; + for (i = 0; i < (Word)di->cfsi_used; i++) { + Addr here_min = di->cfsi[i].base; + Addr here_max = di->cfsi[i].base + di->cfsi[i].len - 1; + if (here_min < di->cfsi_minavma) + di->cfsi_minavma = here_min; + if (here_max > di->cfsi_maxavma) + di->cfsi_maxavma = here_max; + } + + if (di->trace_cfi) + VG_(printf)("canonicaliseCfiSI: %ld entries, %#lx .. %#lx\n", + di->cfsi_used, + di->cfsi_minavma, di->cfsi_maxavma); + + /* Sort the cfsi array by base address. */ + VG_(ssort)(di->cfsi, di->cfsi_used, sizeof(*di->cfsi), compare_DiCfSI); + + /* If two adjacent entries overlap, truncate the first. */ + for (i = 0; i < (Word)di->cfsi_used-1; i++) { + if (di->cfsi[i].base + di->cfsi[i].len > di->cfsi[i+1].base) { + Word new_len = di->cfsi[i+1].base - di->cfsi[i].base; + /* how could it be otherwise? The entries are sorted by the + .base field. */ + vg_assert(new_len >= 0); + vg_assert(new_len <= di->cfsi[i].len); + di->cfsi[i].len = new_len; + } + } + + /* Zap any zero-sized entries resulting from the truncation + process. */ + j = 0; + for (i = 0; i < (Word)di->cfsi_used; i++) { + if (di->cfsi[i].len > 0) { + if (j != i) + di->cfsi[j] = di->cfsi[i]; + j++; + } + } + /* VG_(printf)("XXXXXXXXXXXXX %d %d\n", di->cfsi_used, j); */ + di->cfsi_used = j; + + /* Ensure relevant postconditions hold. */ + for (i = 0; i < (Word)di->cfsi_used; i++) { + /* No zero-length ranges. */ + vg_assert(di->cfsi[i].len > 0); + /* Makes sense w.r.t. summary address range */ + vg_assert(di->cfsi[i].base >= di->cfsi_minavma); + vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1 + <= di->cfsi_maxavma); + + if (i < di->cfsi_used - 1) { + /* + if (!(di->cfsi[i].base < di->cfsi[i+1].base)) { + VG_(printf)("\nOOO cfsis:\n"); + ML_(ppCfiSI)(&di->cfsi[i]); + ML_(ppCfiSI)(&di->cfsi[i+1]); + } + */ + /* In order. */ + vg_assert(di->cfsi[i].base < di->cfsi[i+1].base); + /* No overlaps. */ + vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1 + < di->cfsi[i+1].base); + } + } + +} + + +/* Canonicalise the tables held by 'di', in preparation for use. Call + this after finishing adding entries to these tables. */ +void ML_(canonicaliseTables) ( struct _DebugInfo* di ) +{ + canonicaliseSymtab ( di ); + canonicaliseLoctab ( di ); + canonicaliseCFI ( di ); + canonicaliseVarInfo ( di ); +} + + +/*------------------------------------------------------------*/ +/*--- Searching the tables ---*/ +/*------------------------------------------------------------*/ + +/* Find a symbol-table index containing the specified pointer, or -1 + if not found. Binary search. */ + +Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr, + Bool match_anywhere_in_sym, + Bool findText ) +{ + Addr a_mid_lo, a_mid_hi; + Word mid, size, + lo = 0, + hi = di->symtab_used-1; + while (True) { + /* current unsearched space is from lo to hi, inclusive. */ + if (lo > hi) return -1; /* not found */ + mid = (lo + hi) / 2; + a_mid_lo = di->symtab[mid].addr; + size = ( match_anywhere_in_sym + ? di->symtab[mid].size + : 1); + a_mid_hi = ((Addr)di->symtab[mid].addr) + size - 1; + + if (ptr < a_mid_lo) { hi = mid-1; continue; } + if (ptr > a_mid_hi) { lo = mid+1; continue; } + vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); + /* Found a symbol with the correct address range. But is it + of the right kind (text vs data) ? */ + if ( findText && di->symtab[mid].isText ) return mid; + if ( (!findText) && (!di->symtab[mid].isText) ) return mid; + return -1; + } +} + + +/* Find a location-table index containing the specified pointer, or -1 + if not found. Binary search. */ + +Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr ) +{ + Addr a_mid_lo, a_mid_hi; + Word mid, + lo = 0, + hi = di->loctab_used-1; + while (True) { + /* current unsearched space is from lo to hi, inclusive. */ + if (lo > hi) return -1; /* not found */ + mid = (lo + hi) / 2; + a_mid_lo = di->loctab[mid].addr; + a_mid_hi = ((Addr)di->loctab[mid].addr) + di->loctab[mid].size - 1; + + if (ptr < a_mid_lo) { hi = mid-1; continue; } + if (ptr > a_mid_hi) { lo = mid+1; continue; } + vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); + return mid; + } +} + + +/* Find a CFI-table index containing the specified pointer, or -1 + if not found. Binary search. */ + +Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr ) +{ + Addr a_mid_lo, a_mid_hi; + Word mid, size, + lo = 0, + hi = di->cfsi_used-1; + while (True) { + /* current unsearched space is from lo to hi, inclusive. */ + if (lo > hi) return -1; /* not found */ + mid = (lo + hi) / 2; + a_mid_lo = di->cfsi[mid].base; + size = di->cfsi[mid].len; + a_mid_hi = a_mid_lo + size - 1; + vg_assert(a_mid_hi >= a_mid_lo); + if (ptr < a_mid_lo) { hi = mid-1; continue; } + if (ptr > a_mid_hi) { lo = mid+1; continue; } + vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); + return mid; + } +} + + +/* Find a FPO-table index containing the specified pointer, or -1 + if not found. Binary search. */ + +Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr ) +{ + Addr const addr = ptr - di->rx_map_avma; + Addr a_mid_lo, a_mid_hi; + Word mid, size, + lo = 0, + hi = di->fpo_size-1; + while (True) { + /* current unsearched space is from lo to hi, inclusive. */ + if (lo > hi) return -1; /* not found */ + mid = (lo + hi) / 2; + a_mid_lo = di->fpo[mid].ulOffStart; + size = di->fpo[mid].cbProcSize; + a_mid_hi = a_mid_lo + size - 1; + vg_assert(a_mid_hi >= a_mid_lo); + if (addr < a_mid_lo) { hi = mid-1; continue; } + if (addr > a_mid_hi) { lo = mid+1; continue; } + vg_assert(addr >= a_mid_lo && addr <= a_mid_hi); + return mid; + } +} + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/coregrind/m_debuginfo/.svn/text-base/tytypes.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/tytypes.c.svn-base new file mode 100644 index 0000000..4e3c9ee --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/tytypes.c.svn-base @@ -0,0 +1,876 @@ + +/*--------------------------------------------------------------------*/ +/*--- Representation of source level types. tytypes.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2008-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +#include "pub_core_basics.h" +#include "pub_core_debuginfo.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcbase.h" +#include "pub_core_libcprint.h" +#include "pub_core_xarray.h" /* to keep priv_tytypes.h happy */ + +#include "priv_misc.h" /* dinfo_zalloc/free/strdup */ +#include "priv_d3basics.h" /* ML_(evaluate_Dwarf3_Expr) et al */ +#include "priv_tytypes.h" /* self */ + + +/* Does this TyEnt denote a type, as opposed to some other kind of + thing? */ + +Bool ML_(TyEnt__is_type)( TyEnt* te ) +{ + switch (te->tag) { + case Te_EMPTY: case Te_INDIR: case Te_UNKNOWN: + case Te_Atom: case Te_Field: case Te_Bound: + return False; + case Te_TyBase: case Te_TyPorR: case Te_TyTyDef: + case Te_TyStOrUn: case Te_TyEnum: case Te_TyArray: + case Te_TyFn: case Te_TyQual: case Te_TyVoid: + return True; + default: + vg_assert(0); + } +} + + +/* Print a TyEnt, debug-style. */ + +static void pp_XArray_of_cuOffs ( XArray* xa ) +{ + Word i; + VG_(printf)("{"); + for (i = 0; i < VG_(sizeXA)(xa); i++) { + UWord cuOff = *(UWord*)VG_(indexXA)(xa, i); + VG_(printf)("0x%05lx", cuOff); + if (i+1 < VG_(sizeXA)(xa)) + VG_(printf)(","); + } + VG_(printf)("}"); +} + +void ML_(pp_TyEnt)( TyEnt* te ) +{ + VG_(printf)("0x%05lx ", te->cuOff); + switch (te->tag) { + case Te_EMPTY: + VG_(printf)("EMPTY"); + break; + case Te_INDIR: + VG_(printf)("INDIR(0x%05lx)", te->Te.INDIR.indR); + break; + case Te_UNKNOWN: + VG_(printf)("UNKNOWN"); + break; + case Te_Atom: + VG_(printf)("Te_Atom(%s%lld,\"%s\")", + te->Te.Atom.valueKnown ? "" : "unknown:", + te->Te.Atom.value, te->Te.Atom.name); + break; + case Te_Field: + VG_(printf)("Te_Field(ty=0x%05lx,nLoc=%lu,loc=%p,\"%s\")", + te->Te.Field.typeR, te->Te.Field.nLoc, + te->Te.Field.loc, + te->Te.Field.name ? te->Te.Field.name : (UChar*)""); + break; + case Te_Bound: + VG_(printf)("Te_Bound["); + if (te->Te.Bound.knownL) + VG_(printf)("%lld", te->Te.Bound.boundL); + else + VG_(printf)("??"); + VG_(printf)(","); + if (te->Te.Bound.knownU) + VG_(printf)("%lld", te->Te.Bound.boundU); + else + VG_(printf)("??"); + VG_(printf)("]"); + break; + case Te_TyBase: + VG_(printf)("Te_TyBase(%d,%c,\"%s\")", + te->Te.TyBase.szB, te->Te.TyBase.enc, + te->Te.TyBase.name ? te->Te.TyBase.name + : (UChar*)"(null)" ); + break; + case Te_TyPorR: + VG_(printf)("Te_TyPorR(%d,%c,0x%05lx)", + te->Te.TyPorR.szB, + te->Te.TyPorR.isPtr ? 'P' : 'R', + te->Te.TyPorR.typeR); + break; + case Te_TyTyDef: + VG_(printf)("Te_TyTyDef(0x%05lx,\"%s\")", + te->Te.TyTyDef.typeR, + te->Te.TyTyDef.name ? te->Te.TyTyDef.name + : (UChar*)"" ); + break; + case Te_TyStOrUn: + if (te->Te.TyStOrUn.complete) { + VG_(printf)("Te_TyStOrUn(%ld,%c,%p,\"%s\")", + te->Te.TyStOrUn.szB, + te->Te.TyStOrUn.isStruct ? 'S' : 'U', + te->Te.TyStOrUn.fieldRs, + te->Te.TyStOrUn.name ? te->Te.TyStOrUn.name + : (UChar*)"" ); + if (te->Te.TyStOrUn.fieldRs) + pp_XArray_of_cuOffs( te->Te.TyStOrUn.fieldRs ); + } else { + VG_(printf)("Te_TyStOrUn(INCOMPLETE,\"%s\")", + te->Te.TyStOrUn.name); + } + break; + case Te_TyEnum: + VG_(printf)("Te_TyEnum(%d,%p,\"%s\")", + te->Te.TyEnum.szB, te->Te.TyEnum.atomRs, + te->Te.TyEnum.name ? te->Te.TyEnum.name + : (UChar*)"" ); + if (te->Te.TyEnum.atomRs) + pp_XArray_of_cuOffs( te->Te.TyEnum.atomRs ); + break; + case Te_TyArray: + VG_(printf)("Te_TyArray(0x%05lx,%p)", + te->Te.TyArray.typeR, te->Te.TyArray.boundRs); + if (te->Te.TyArray.boundRs) + pp_XArray_of_cuOffs( te->Te.TyArray.boundRs ); + break; + case Te_TyFn: + VG_(printf)("Te_TyFn"); + break; + case Te_TyQual: + VG_(printf)("Te_TyQual(%c,0x%05lx)", te->Te.TyQual.qual, + te->Te.TyQual.typeR); + break; + case Te_TyVoid: + VG_(printf)("Te_TyVoid%s", + te->Te.TyVoid.isFake ? "(fake)" : ""); + break; + default: + vg_assert(0); + } +} + + +/* Print a whole XArray of TyEnts, debug-style */ + +void ML_(pp_TyEnts)( XArray* tyents, HChar* who ) +{ + Word i, n; + VG_(printf)("------ %s ------\n", who); + n = VG_(sizeXA)( tyents ); + for (i = 0; i < n; i++) { + TyEnt* tyent = (TyEnt*)VG_(indexXA)( tyents, i ); + VG_(printf)(" [%5ld] ", i); + ML_(pp_TyEnt)( tyent ); + VG_(printf)("\n"); + } +} + + +/* Print a TyEnt, C style, chasing stuff as necessary. */ + +static void pp_TyBound_C_ishly ( XArray* tyents, UWord cuOff ) +{ + TyEnt* ent = ML_(TyEnts__index_by_cuOff)( tyents, NULL, cuOff ); + if (!ent) { + VG_(printf)("**bounds-have-invalid-cuOff**"); + return; + } + vg_assert(ent->tag == Te_Bound); + if (ent->Te.Bound.knownL && ent->Te.Bound.knownU + && ent->Te.Bound.boundL == 0) { + VG_(printf)("[%lld]", 1 + ent->Te.Bound.boundU); + } + else + if (ent->Te.Bound.knownL && (!ent->Te.Bound.knownU) + && ent->Te.Bound.boundL == 0) { + VG_(printf)("[]"); + } + else + ML_(pp_TyEnt)( ent ); +} + +void ML_(pp_TyEnt_C_ishly)( XArray* /* of TyEnt */ tyents, + UWord cuOff ) +{ + TyEnt* ent = ML_(TyEnts__index_by_cuOff)( tyents, NULL, cuOff ); + if (!ent) { + VG_(printf)("**type-has-invalid-cuOff**"); + return; + } + switch (ent->tag) { + case Te_TyBase: + if (!ent->Te.TyBase.name) goto unhandled; + VG_(printf)("%s", ent->Te.TyBase.name); + break; + case Te_TyPorR: + ML_(pp_TyEnt_C_ishly)(tyents, ent->Te.TyPorR.typeR); + VG_(printf)("%s", ent->Te.TyPorR.isPtr ? "*" : "&"); + break; + case Te_TyEnum: + if (!ent->Te.TyEnum.name) goto unhandled; + VG_(printf)("enum %s", ent->Te.TyEnum.name); + break; + case Te_TyStOrUn: + VG_(printf)("%s %s", + ent->Te.TyStOrUn.isStruct ? "struct" : "union", + ent->Te.TyStOrUn.name ? ent->Te.TyStOrUn.name + : (UChar*)"<anonymous>" ); + break; + case Te_TyArray: + ML_(pp_TyEnt_C_ishly)(tyents, ent->Te.TyArray.typeR); + if (ent->Te.TyArray.boundRs) { + Word w; + XArray* xa = ent->Te.TyArray.boundRs; + for (w = 0; w < VG_(sizeXA)(xa); w++) { + pp_TyBound_C_ishly( tyents, *(UWord*)VG_(indexXA)(xa, w) ); + } + } else { + VG_(printf)("%s", "[??]"); + } + break; + case Te_TyTyDef: + if (!ent->Te.TyTyDef.name) goto unhandled; + VG_(printf)("%s", ent->Te.TyTyDef.name); + break; + case Te_TyFn: + VG_(printf)("%s", "<function_type>"); + break; + case Te_TyQual: + switch (ent->Te.TyQual.qual) { + case 'C': VG_(printf)("const "); break; + case 'V': VG_(printf)("volatile "); break; + default: goto unhandled; + } + ML_(pp_TyEnt_C_ishly)(tyents, ent->Te.TyQual.typeR); + break; + case Te_TyVoid: + VG_(printf)("%svoid", + ent->Te.TyVoid.isFake ? "fake" : ""); + break; + case Te_UNKNOWN: + ML_(pp_TyEnt)(ent); + break; + default: + goto unhandled; + } + return; + + unhandled: + VG_(printf)("pp_TyEnt_C_ishly:unhandled: "); + ML_(pp_TyEnt)(ent); + vg_assert(0); +} + + +/* 'ents' is an XArray of TyEnts, sorted by their .cuOff fields. Find + the entry which has .cuOff field as specified. Returns NULL if not + found. Asserts if more than one entry has the specified .cuOff + value. */ + +void ML_(TyEntIndexCache__invalidate) ( TyEntIndexCache* cache ) +{ + Word i; + for (i = 0; i < N_TYENT_INDEX_CACHE; i++) { + cache->ce[i].cuOff0 = 0; /* not actually necessary */ + cache->ce[i].ent0 = NULL; /* "invalid entry" */ + cache->ce[i].cuOff1 = 0; /* not actually necessary */ + cache->ce[i].ent1 = NULL; /* "invalid entry" */ + } +} + +TyEnt* ML_(TyEnts__index_by_cuOff) ( XArray* /* of TyEnt */ ents, + TyEntIndexCache* cache, + UWord cuOff_to_find ) +{ + Bool found; + Word first, last; + TyEnt key, *res; + + /* crude stats, aggregated over all caches */ + static UWord cacheQs = 0 - 1; + static UWord cacheHits = 0; + + if (0 && 0 == (cacheQs & 0xFFFF)) + VG_(printf)("cache: %'lu queries, %'lu misses\n", + cacheQs, cacheQs - cacheHits); + + if (LIKELY(cache != NULL)) { + UWord h = cuOff_to_find % (UWord)N_TYENT_INDEX_CACHE; + cacheQs++; + // dude, like, way 0, dude. + if (cache->ce[h].cuOff0 == cuOff_to_find && cache->ce[h].ent0 != NULL) { + // dude, way 0 is a total hit! + cacheHits++; + return cache->ce[h].ent0; + } + // dude, check out way 1, dude. + if (cache->ce[h].cuOff1 == cuOff_to_find && cache->ce[h].ent1 != NULL) { + // way 1 hit + UWord tc; + TyEnt* te; + cacheHits++; + // dude, way 1 is the new way 0. move with the times, dude. + tc = cache->ce[h].cuOff0; + te = cache->ce[h].ent0; + cache->ce[h].cuOff0 = cache->ce[h].cuOff1; + cache->ce[h].ent0 = cache->ce[h].ent1; + cache->ce[h].cuOff1 = tc; + cache->ce[h].ent1 = te; + return cache->ce[h].ent0; + } + } + + /* We'll have to do it the hard way */ + key.cuOff = cuOff_to_find; + key.tag = Te_EMPTY; + found = VG_(lookupXA)( ents, &key, &first, &last ); + //found = VG_(lookupXA_UNBOXED)( ents, cuOff_to_find, &first, &last, + // offsetof(TyEnt,cuOff) ); + if (!found) + return NULL; + /* If this fails, the array is invalid in the sense that there is + more than one entry with .cuOff == cuOff_to_find. */ + vg_assert(first == last); + res = (TyEnt*)VG_(indexXA)( ents, first ); + + if (LIKELY(cache != NULL) && LIKELY(res != NULL)) { + /* this is a bit stupid, computing this twice. Oh well. + Perhaps some magic gcc transformation will common them up. + re "res != NULL", since .ent of NULL denotes 'invalid entry', + we can't cache the result when res == NULL. */ + UWord h = cuOff_to_find % (UWord)N_TYENT_INDEX_CACHE; + cache->ce[h].cuOff1 = cache->ce[h].cuOff0; + cache->ce[h].ent1 = cache->ce[h].ent0; + cache->ce[h].cuOff0 = cuOff_to_find; + cache->ce[h].ent0 = res; + } + + return res; +} + + +/* Generates a total ordering on TyEnts based only on their .cuOff + fields. */ + +Word ML_(TyEnt__cmp_by_cuOff_only) ( TyEnt* te1, TyEnt* te2 ) +{ + if (te1->cuOff < te2->cuOff) return -1; + if (te1->cuOff > te2->cuOff) return 1; + return 0; +} + + +/* Generates a total ordering on TyEnts based on everything except + their .cuOff fields. */ +static __attribute__((always_inline)) Word UWord__cmp ( UWord a, UWord b ) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} +static __attribute__((always_inline)) Word Long__cmp ( Long a, Long b ) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} +static __attribute__((always_inline)) Word Bool__cmp ( Bool a, Bool b ) { + vg_assert( ((UWord)a) <= 1 ); + vg_assert( ((UWord)b) <= 1 ); + if (a < b) return -1; + if (a > b) return 1; + return 0; +} +static __attribute__((always_inline)) Word UChar__cmp ( UChar a, UChar b ) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} +static __attribute__((always_inline)) Word Int__cmp ( Int a, Int b ) { + if (a < b) return -1; + if (a > b) return 1; + return 0; +} +static Word XArray_of_UWord__cmp ( XArray* a, XArray* b ) { + Word i, r; + Word aN = VG_(sizeXA)( a ); + Word bN = VG_(sizeXA)( b ); + if (aN < bN) return -1; + if (aN > bN) return 1; + for (i = 0; i < aN; i++) { + r = UWord__cmp( *(UWord*)VG_(indexXA)( a, i ), + *(UWord*)VG_(indexXA)( b, i ) ); + if (r != 0) return r; + } + return 0; +} +static Word Bytevector__cmp ( UChar* a, UChar* b, Word n ) { + Word i, r; + vg_assert(n >= 0); + for (i = 0; i < n; i++) { + r = UChar__cmp( a[i], b[i] ); + if (r != 0) return r; + } + return 0; +} +static Word Asciiz__cmp ( UChar* a, UChar* b ) { + /* A wrapper around strcmp that handles NULL strings safely. */ + if (a == NULL && b == NULL) return 0; + if (a == NULL && b != NULL) return -1; + if (a != NULL && b == NULL) return 1; + return VG_(strcmp)(a, b); +} + +Word ML_(TyEnt__cmp_by_all_except_cuOff) ( TyEnt* te1, TyEnt* te2 ) +{ + Word r; + if (te1->tag < te2->tag) return -1; + if (te1->tag > te2->tag) return 1; + switch (te1->tag) { + case Te_EMPTY: + return 0; + case Te_INDIR: + r = UWord__cmp(te1->Te.INDIR.indR, te2->Te.INDIR.indR); + return r; + case Te_Atom: + r = Bool__cmp(te1->Te.Atom.valueKnown, te2->Te.Atom.valueKnown); + if (r != 0) return r; + r = Long__cmp(te1->Te.Atom.value, te2->Te.Atom.value); + if (r != 0) return r; + r = Asciiz__cmp(te1->Te.Atom.name, te2->Te.Atom.name); + return r; + case Te_Field: + r = Bool__cmp(te1->Te.Field.isStruct, te2->Te.Field.isStruct); + if (r != 0) return r; + r = UWord__cmp(te1->Te.Field.typeR, te2->Te.Field.typeR); + if (r != 0) return r; + r = Asciiz__cmp(te1->Te.Field.name, te2->Te.Field.name); + if (r != 0) return r; + r = UWord__cmp(te1->Te.Field.nLoc, te2->Te.Field.nLoc); + if (r != 0) return r; + r = Bytevector__cmp(te1->Te.Field.loc, te2->Te.Field.loc, + te1->Te.Field.nLoc); + return r; + case Te_Bound: + r = Bool__cmp(te1->Te.Bound.knownL, te2->Te.Bound.knownL); + if (r != 0) return r; + r = Bool__cmp(te1->Te.Bound.knownU, te2->Te.Bound.knownU); + if (r != 0) return r; + r = Long__cmp(te1->Te.Bound.boundL, te2->Te.Bound.boundL); + if (r != 0) return r; + r = Long__cmp(te1->Te.Bound.boundU, te2->Te.Bound.boundU); + return r; + case Te_TyBase: + r = UChar__cmp(te1->Te.TyBase.enc, te2->Te.TyBase.enc); + if (r != 0) return r; + r = Int__cmp(te1->Te.TyBase.szB, te2->Te.TyBase.szB); + if (r != 0) return r; + r = Asciiz__cmp(te1->Te.TyBase.name, te2->Te.TyBase.name); + return r; + case Te_TyPorR: + r = Int__cmp(te1->Te.TyPorR.szB, te2->Te.TyPorR.szB); + if (r != 0) return r; + r = UWord__cmp(te1->Te.TyPorR.typeR, te2->Te.TyPorR.typeR); + if (r != 0) return r; + r = Bool__cmp(te1->Te.TyPorR.isPtr, te2->Te.TyPorR.isPtr); + return r; + case Te_TyTyDef: + r = UWord__cmp(te1->Te.TyTyDef.typeR, te2->Te.TyTyDef.typeR); + if (r != 0) return r; + r = Asciiz__cmp(te1->Te.TyTyDef.name, te2->Te.TyTyDef.name); + return r; + case Te_TyStOrUn: + r = Bool__cmp(te1->Te.TyStOrUn.isStruct, te2->Te.TyStOrUn.isStruct); + if (r != 0) return r; + r = Bool__cmp(te1->Te.TyStOrUn.complete, te2->Te.TyStOrUn.complete); + if (r != 0) return r; + r = UWord__cmp(te1->Te.TyStOrUn.szB, te2->Te.TyStOrUn.szB); + if (r != 0) return r; + r = Asciiz__cmp(te1->Te.TyStOrUn.name, te2->Te.TyStOrUn.name); + if (r != 0) return r; + r = XArray_of_UWord__cmp(te1->Te.TyStOrUn.fieldRs, + te2->Te.TyStOrUn.fieldRs); + return r; + case Te_TyEnum: + r = Int__cmp(te1->Te.TyEnum.szB, te2->Te.TyEnum.szB); + if (r != 0) return r; + r = Asciiz__cmp(te1->Te.TyEnum.name, te2->Te.TyEnum.name); + if (r != 0) return r; + r = XArray_of_UWord__cmp(te1->Te.TyEnum.atomRs, te2->Te.TyEnum.atomRs); + return r; + case Te_TyArray: + r = UWord__cmp(te1->Te.TyArray.typeR, te2->Te.TyArray.typeR); + if (r != 0) return r; + r = XArray_of_UWord__cmp(te1->Te.TyArray.boundRs, + te2->Te.TyArray.boundRs); + return r; + case Te_TyFn: + return 0; + case Te_TyQual: + r = UWord__cmp(te1->Te.TyQual.typeR, te2->Te.TyQual.typeR); + if (r != 0) return r; + r = UChar__cmp(te1->Te.TyQual.qual, te2->Te.TyQual.qual); + return r; + case Te_TyVoid: + r = Bool__cmp(te1->Te.TyVoid.isFake, te2->Te.TyVoid.isFake); + return r; + default: + vg_assert(0); + } +} + + +/* Free up all directly or indirectly heap-allocated stuff attached to + this TyEnt, and set its tag to Te_EMPTY. The .cuOff field is + unchanged. */ + +void ML_(TyEnt__make_EMPTY) ( TyEnt* te ) +{ + UWord saved_cuOff; + /* First, free up any fields in mallocville. */ + switch (te->tag) { + case Te_EMPTY: + break; + case Te_INDIR: + break; + case Te_UNKNOWN: + break; + case Te_Atom: + if (te->Te.Atom.name) ML_(dinfo_free)(te->Te.Atom.name); + break; + case Te_Field: + if (te->Te.Field.name) ML_(dinfo_free)(te->Te.Field.name); + if (te->Te.Field.loc) ML_(dinfo_free)(te->Te.Field.loc); + break; + case Te_Bound: + break; + case Te_TyBase: + if (te->Te.TyBase.name) ML_(dinfo_free)(te->Te.TyBase.name); + break; + case Te_TyPorR: + break; + case Te_TyTyDef: + if (te->Te.TyTyDef.name) ML_(dinfo_free)(te->Te.TyTyDef.name); + break; + case Te_TyStOrUn: + if (te->Te.TyStOrUn.name) ML_(dinfo_free)(te->Te.TyStOrUn.name); + if (te->Te.TyStOrUn.fieldRs) VG_(deleteXA)(te->Te.TyStOrUn.fieldRs); + break; + case Te_TyEnum: + if (te->Te.TyEnum.name) ML_(dinfo_free)(te->Te.TyEnum.name); + if (te->Te.TyEnum.atomRs) VG_(deleteXA)(te->Te.TyEnum.atomRs); + break; + case Te_TyArray: + if (te->Te.TyArray.boundRs) VG_(deleteXA)(te->Te.TyArray.boundRs); + break; + case Te_TyFn: + break; + case Te_TyQual: + break; + case Te_TyVoid: + break; + default: + vg_assert(0); + } + /* Now clear it out and set to Te_EMPTY. */ + saved_cuOff = te->cuOff; + VG_(memset)(te, 0, sizeof(*te)); + te->cuOff = saved_cuOff; + te->tag = Te_EMPTY; +} + + +/* How big is this type? If .b in the returned struct is False, the + size is unknown. */ + +static MaybeULong mk_MaybeULong_Nothing ( void ) { + MaybeULong mul; + mul.ul = 0; + mul.b = False; + return mul; +} +static MaybeULong mk_MaybeULong_Just ( ULong ul ) { + MaybeULong mul; + mul.ul = ul; + mul.b = True; + return mul; +} +static MaybeULong mul_MaybeULong ( MaybeULong mul1, MaybeULong mul2 ) { + if (!mul1.b) { vg_assert(mul1.ul == 0); return mul1; } + if (!mul2.b) { vg_assert(mul2.ul == 0); return mul2; } + mul1.ul *= mul2.ul; + return mul1; +} + +MaybeULong ML_(sizeOfType)( XArray* /* of TyEnt */ tyents, + UWord cuOff ) +{ + Word i; + MaybeULong eszB; + TyEnt* ent = ML_(TyEnts__index_by_cuOff)(tyents, NULL, cuOff); + TyEnt* ent2; + vg_assert(ent); + vg_assert(ML_(TyEnt__is_type)(ent)); + switch (ent->tag) { + case Te_TyBase: + vg_assert(ent->Te.TyBase.szB > 0); + return mk_MaybeULong_Just( ent->Te.TyBase.szB ); + case Te_TyQual: + return ML_(sizeOfType)( tyents, ent->Te.TyQual.typeR ); + case Te_TyTyDef: + ent2 = ML_(TyEnts__index_by_cuOff)(tyents, NULL, + ent->Te.TyTyDef.typeR); + vg_assert(ent2); + if (ent2->tag == Te_UNKNOWN) + return mk_MaybeULong_Nothing(); /*UNKNOWN*/ + return ML_(sizeOfType)( tyents, ent->Te.TyTyDef.typeR ); + case Te_TyPorR: + vg_assert(ent->Te.TyPorR.szB == 4 || ent->Te.TyPorR.szB == 8); + return mk_MaybeULong_Just( ent->Te.TyPorR.szB ); + case Te_TyStOrUn: + return ent->Te.TyStOrUn.complete + ? mk_MaybeULong_Just( ent->Te.TyStOrUn.szB ) + : mk_MaybeULong_Nothing(); + case Te_TyEnum: + return mk_MaybeULong_Just( ent->Te.TyEnum.szB ); + case Te_TyArray: + ent2 = ML_(TyEnts__index_by_cuOff)(tyents, NULL, + ent->Te.TyArray.typeR); + vg_assert(ent2); + if (ent2->tag == Te_UNKNOWN) + return mk_MaybeULong_Nothing(); /*UNKNOWN*/ + eszB = ML_(sizeOfType)( tyents, ent->Te.TyArray.typeR ); + for (i = 0; i < VG_(sizeXA)( ent->Te.TyArray.boundRs ); i++) { + UWord bo_cuOff + = *(UWord*)VG_(indexXA)(ent->Te.TyArray.boundRs, i); + TyEnt* bo + = ML_(TyEnts__index_by_cuOff)( tyents, NULL, bo_cuOff ); + vg_assert(bo); + vg_assert(bo->tag == Te_Bound); + if (!(bo->Te.Bound.knownL && bo->Te.Bound.knownU)) + return mk_MaybeULong_Nothing(); /*UNKNOWN*/ + eszB = mul_MaybeULong( + eszB, + mk_MaybeULong_Just( (ULong)(bo->Te.Bound.boundU + - bo->Te.Bound.boundL + 1) )); + } + return eszB; + default: + VG_(printf)("ML_(sizeOfType): unhandled: "); + ML_(pp_TyEnt)(ent); + VG_(printf)("\n"); + vg_assert(0); + } +} + + +/* Describe where in the type 'offset' falls. Caller must + deallocate the resulting XArray. */ + +static void copy_UWord_into_XA ( XArray* /* of UChar */ xa, + UWord uw ) { + UChar buf[32]; + VG_(memset)(buf, 0, sizeof(buf)); + VG_(sprintf)(buf, "%lu", uw); + VG_(addBytesToXA)( xa, buf, VG_(strlen)(buf)); +} + +XArray* /*UChar*/ ML_(describe_type)( /*OUT*/PtrdiffT* residual_offset, + XArray* /* of TyEnt */ tyents, + UWord ty_cuOff, + PtrdiffT offset ) +{ + TyEnt* ty; + XArray* xa = VG_(newXA)( ML_(dinfo_zalloc), "di.tytypes.dt.1", + ML_(dinfo_free), + sizeof(UChar) ); + vg_assert(xa); + + ty = ML_(TyEnts__index_by_cuOff)(tyents, NULL, ty_cuOff); + + while (True) { + vg_assert(ty); + vg_assert(ML_(TyEnt__is_type)(ty)); + + switch (ty->tag) { + + /* These are all atomic types; there is nothing useful we can + do. */ + case Te_TyEnum: + case Te_TyFn: + case Te_TyVoid: + case Te_TyPorR: + case Te_TyBase: + goto done; + + case Te_TyStOrUn: { + Word i; + GXResult res; + MaybeULong mul; + XArray* fieldRs; + UWord fieldR; + TyEnt* field = NULL; + PtrdiffT offMin = 0, offMax1 = 0; + if (!ty->Te.TyStOrUn.isStruct) goto done; + fieldRs = ty->Te.TyStOrUn.fieldRs; + if ((!fieldRs) || VG_(sizeXA)(fieldRs) == 0) goto done; + for (i = 0; i < VG_(sizeXA)( fieldRs ); i++ ) { + fieldR = *(UWord*)VG_(indexXA)( fieldRs, i ); + field = ML_(TyEnts__index_by_cuOff)(tyents, NULL, fieldR); + vg_assert(field); + vg_assert(field->tag == Te_Field); + vg_assert(field->Te.Field.loc); + vg_assert(field->Te.Field.nLoc > 0); + /* Re data_bias in this call, we should really send in + a legitimate value. But the expression is expected + to be a constant expression, evaluation of which + will not need to use DW_OP_addr and hence we can + avoid the trouble of plumbing the data bias through + to this point (if, indeed, it has any meaning; from + which DebugInfo would we take the data bias? */ + res = ML_(evaluate_Dwarf3_Expr)( + field->Te.Field.loc, field->Te.Field.nLoc, + NULL/*fbGX*/, NULL/*RegSummary*/, + 0/*data_bias*/, + True/*push_initial_zero*/); + if (0) { + VG_(printf)("QQQ "); + ML_(pp_GXResult)(res); + VG_(printf)("\n"); + } + if (res.kind != GXR_Value) + continue; + mul = ML_(sizeOfType)( tyents, field->Te.Field.typeR ); + if (mul.b != True) + goto done; /* size of field is unknown (?!) */ + offMin = res.word; + offMax1 = offMin + (PtrdiffT)mul.ul; + if (offMin == offMax1) + continue; + vg_assert(offMin < offMax1); + if (offset >= offMin && offset < offMax1) + break; + } + /* Did we find a suitable field? */ + vg_assert(i >= 0 && i <= VG_(sizeXA)( fieldRs )); + if (i == VG_(sizeXA)( fieldRs )) + goto done; /* No. Give up. */ + /* Yes. 'field' is it. */ + vg_assert(field); + if (!field->Te.Field.name) goto done; + VG_(addBytesToXA)( xa, ".", 1 ); + VG_(addBytesToXA)( xa, field->Te.Field.name, + VG_(strlen)(field->Te.Field.name) ); + offset -= offMin; + ty = ML_(TyEnts__index_by_cuOff)(tyents, NULL, + field->Te.Field.typeR ); + tl_assert(ty); + if (ty->tag == Te_UNKNOWN) goto done; + /* keep going; look inside the field. */ + break; + } + + case Te_TyArray: { + MaybeULong mul; + UWord size, eszB, ix; + UWord boundR; + TyEnt* elemTy; + TyEnt* bound; + /* Just deal with the simple, common C-case: 1-D array, + zero based, known size. */ + elemTy = ML_(TyEnts__index_by_cuOff)(tyents, NULL, + ty->Te.TyArray.typeR); + vg_assert(elemTy); + if (elemTy->tag == Te_UNKNOWN) goto done; + vg_assert(ML_(TyEnt__is_type)(elemTy)); + if (!ty->Te.TyArray.boundRs) + goto done; + if (VG_(sizeXA)( ty->Te.TyArray.boundRs ) != 1) goto done; + boundR = *(UWord*)VG_(indexXA)( ty->Te.TyArray.boundRs, 0 ); + bound = ML_(TyEnts__index_by_cuOff)(tyents, NULL, boundR); + vg_assert(bound); + vg_assert(bound->tag == Te_Bound); + if (!(bound->Te.Bound.knownL && bound->Te.Bound.knownU + && bound->Te.Bound.boundL == 0 + && bound->Te.Bound.boundU >= bound->Te.Bound.boundL)) + goto done; + size = bound->Te.Bound.boundU - bound->Te.Bound.boundL + 1; + vg_assert(size >= 1); + mul = ML_(sizeOfType)( tyents, ty->Te.TyArray.typeR ); + if (mul.b != True) + goto done; /* size of element type not known */ + eszB = mul.ul; + if (eszB == 0) goto done; + ix = offset / eszB; + VG_(addBytesToXA)( xa, "[", 1 ); + copy_UWord_into_XA( xa, ix ); + VG_(addBytesToXA)( xa, "]", 1 ); + ty = elemTy; + offset -= ix * eszB; + /* keep going; look inside the array element. */ + break; + } + + case Te_TyQual: { + ty = ML_(TyEnts__index_by_cuOff)(tyents, NULL, + ty->Te.TyQual.typeR); + tl_assert(ty); + if (ty->tag == Te_UNKNOWN) goto done; + break; + } + + case Te_TyTyDef: { + ty = ML_(TyEnts__index_by_cuOff)(tyents, NULL, + ty->Te.TyTyDef.typeR); + tl_assert(ty); + if (ty->tag == Te_UNKNOWN) goto done; + break; + } + + default: { + VG_(printf)("ML_(describe_type): unhandled: "); + ML_(pp_TyEnt)(ty); + VG_(printf)("\n"); + vg_assert(0); + } + + } + } + + done: + *residual_offset = offset; + VG_(addBytesToXA)( xa, "\0", 1 ); + return xa; +} + +/*--------------------------------------------------------------------*/ +/*--- end tytypes.c ---*/ +/*--------------------------------------------------------------------*/ |