diff options
Diffstat (limited to 'coregrind/m_debuginfo/.svn/text-base/readxcoff.c.svn-base')
-rw-r--r-- | coregrind/m_debuginfo/.svn/text-base/readxcoff.c.svn-base | 2486 |
1 files changed, 2486 insertions, 0 deletions
diff --git a/coregrind/m_debuginfo/.svn/text-base/readxcoff.c.svn-base b/coregrind/m_debuginfo/.svn/text-base/readxcoff.c.svn-base new file mode 100644 index 0000000..4435d7c --- /dev/null +++ b/coregrind/m_debuginfo/.svn/text-base/readxcoff.c.svn-base @@ -0,0 +1,2486 @@ + +/*--------------------------------------------------------------------*/ +/*--- Read XCOFF debug info. readxcoff.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2006-2009 OpenWorks LLP + info@open-works.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. +*/ + +/* This file reads XCOFF symbol tables and debug info. + Known limitations: + + * only one text section per object file is handled + + * C_BINCL/C_EINCL handling is wrong, so functions defined in files + included from other files will end up with the wrong file name + and possibly line numbers. Fixable. + + * The line number reader leans heavily on the fact that the generic + line number canonicaliser in storage.c truncates overlapping + ranges. +*/ + +#include "pub_core_basics.h" +#include "pub_core_vki.h" /* struct vki_stat et al */ +#include "pub_core_libcbase.h" +#include "pub_core_libcassert.h" +#include "pub_core_libcprint.h" +#include "pub_core_libcfile.h" /* stat, open, close */ +#include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */ +#include "pub_core_options.h" /* VG_(clo_trace_symtab) */ +#include "pub_core_xarray.h" +#include "priv_misc.h" +#include "priv_tytypes.h" +#include "pub_tool_debuginfo.h" +#include "priv_d3basics.h" +#include "priv_storage.h" +#include "priv_readxcoff.h" /* self */ + +/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ +#if defined(VGP_ppc32_aix5) +# define __XCOFF32__ 1 +# undef __XCOFF64__ +#elif defined(VGP_ppc64_aix5) +# define __XCOFF64__ 1 +# undef __XCOFF32__ +#else +# error "This file should only be compiled on AIX" +#endif +#include <xcoff.h> + +#undef __AR_SMALL__ +#define __AR_BIG__ 1 +#include <ar.h> +/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ + +/* Debug stuff */ +#define SHOW_LD_STRTAB 1 /* loader string tables */ +#define SHOW_LD_SYMTAB 1 /* loader symbol table */ +#define SHOW_LD_RELTAB 1 /* loader reloc table */ +#define SHOW_STRTAB 1 /* main string table */ +#define SHOW_SYMS_P1 1 /* P1: find text sym starts */ +#define SHOW_SYMS_P2 1 /* P2: find text sym ends */ +#define SHOW_SYMS_P3 1 /* P3: src filenames & fn start/end line #s */ +#define SHOW_SYMS_P4 1 /* P4: line numbers */ +#define SHOW_SYMS_P5 1 /* P5: find TOC pointers */ +#define SHOW_SYMS_P6 1 /* P6: finalise symbol info */ + +#define SHOW_AR_DETAILS 0 /* show details of .a file internals */ + +#define SHOW di->trace_symtab + +/* A small stack of filenames is maintained for dealing + with BINCL/EINCL symbol table entries. */ + +#define N_FILENAME_STACK 16 + +/* Phase 5 (find TOC pointers) has two implementations, the official + version, which involves reading the data segment symbols, and the + kludgey version, which basically scans the (actual loaded) data + segment to find structs which look like function descriptors. */ + +#if 1 +# undef OFFICIAL_PHASE5 +#else +# define OFFICIAL_PHASE5 1 +#endif + +/*------------------------------------------------------------*/ +/*--- Read XCOFF format debug info. ---*/ +/*------------------------------------------------------------*/ + + +/* COFF uses a strange way to represent symbol names. A symbol is an + eight-byte field. + + In 32-bit mode: if the first four bytes are zero, then the second + four bytes give the offset into the string table where the string + really is. Otherwise, the whole 8-byte thing is itself the name. + + In 64-bit mode: a four-byte field at offset 8 is always interpreted + as an offset into the string table. + + For a symbol of length 8, in 32-bit mode, there is no obvious way + to zero-terminate it. One solution is to copy the name into + dynamically allocated memory, but that complicates storage + management. + + An alternative solution, used here, is to represent a name as a + (data, length) pair instead of the traditional zero-terminated + string. Such a pair can be constructed for any XCOFF symbol name, + and has the advantages that (1) no dynamic memory is required, and + (2) the name is guaranteed to be accessible as long as the object + image is mapped in. + + What the .vec points at must not be modified; if you want to do + that, copy it elsewhere first. +*/ + +typedef + struct { + UChar* vec; /* the text of the name */ + UInt len; /* length of the text */ + } + Name; + +static Name maybeDerefStrTab( SYMENT* sym, + UChar* oi_strtab, UWord oi_n_strtab) +{ + Name res; + static UChar* bogus + = (UChar*)"**_Error_Dereferencing_COFF_String_Table_**"; + UChar* bytes = (UChar*)sym; + +# if defined(VGP_ppc32_aix5) + if (bytes[0]==0 && bytes[1]==0 && bytes[2]==0 && bytes[3]==0) { + UInt off = *(UInt*)&bytes[4]; + if (oi_strtab && oi_n_strtab > 0 && off < oi_n_strtab) { + res.vec = &oi_strtab[off]; + res.len = VG_(strlen)(res.vec); + return res; + } else + goto bad; + } else { + Int i; + res.vec = bytes; + res.len = 8; + for (i = 0; i < 8; i++) + if (bytes[i] == 0) + res.len--; + return res; + } + +# elif defined(VGP_ppc64_aix5) + ULong off = (ULong)( *(UInt*)&bytes[8] ); + if (oi_strtab && oi_n_strtab > 0 && off < oi_n_strtab) { + res.vec = &oi_strtab[off]; + res.len = VG_(strlen)(res.vec); + return res; + } else + goto bad; + +# else +# error "Unknown platform" +# endif + + bad: + res.vec = bogus; + res.len = VG_(strlen)(bogus); + return res; +} + + +/* Similar scheme for extracting names from C_FILE auxiliary entries, + except that the 32-bit scheme appears to be always used, even for + XCOFF64. */ + +static Name maybeDerefStrTab_fname ( UChar* bytes, + UChar* oi_strtab, UWord oi_n_strtab) +{ + Name res; + static UChar* bogus + = (UChar*)"**_Error_Dereferencing_COFF_String_Table_**"; + + if (bytes[0]==0 && bytes[1]==0 && bytes[2]==0 && bytes[3]==0) { + UInt off = *(UInt*)&bytes[4]; + if (oi_strtab && oi_n_strtab > 0 && off < oi_n_strtab) { + res.vec = &oi_strtab[off]; + res.len = VG_(strlen)(res.vec); + return res; + } else + goto bad; + } else { + Int i; + res.vec = bytes; + res.len = 8; + for (i = 0; i < 8; i++) + if (bytes[i] == 0) + res.len--; + return res; + } + + bad: + res.vec = bogus; + res.len = VG_(strlen)(bogus); + return res; +} + + +static Name mk_const_Name ( HChar* str ) +{ + Name res; + res.vec = str; + res.len = VG_(strlen)(res.vec); + return res; +} + +static Name mk_empty_Name ( void ) +{ + Name res; + res.vec = ""; + res.len = 0; + return res; +} + +static Bool is_empty_Name ( Name name ) +{ + return name.len == 0; +} + +static Bool eq_string_Name ( Name name, UChar* str ) +{ + UInt i; + for (i = 0; i < name.len; i++) { + if (str[i] == 0) + return False; + if (str[i] != name.vec[i]) + return False; + } + if (str[name.len] == 0) + return True; + else + return False; +} + +static Int cmp_Names ( Name n1, Name n2 ) +{ + UInt i = 0; + while (1) { + vg_assert(i >= 0 && i <= n1.len); + vg_assert(i >= 0 && i <= n2.len); + if (i == n1.len && i == n2.len) + return 0; + if (i == n1.len && i < n2.len) + return -1; + if (i < n1.len && i == n2.len) + return 1; + if (n1.vec[i] < n2.vec[i]) + return -1; + if (n1.vec[i] > n2.vec[i]) + return 1; + i++; + } +} + +static void print_Name ( Name name ) +{ + UInt i; + for (i = 0; i < name.len; i++) + VG_(printf)("%c", name.vec[i]); +} + + +static UChar sanitiseChar ( UChar c ) +{ + if (c < 32 || c > 127) + c = '?'; + return c; +} + +static HChar* name_of_filhdr_f_magic ( Int magic ) +{ + switch (magic) { + case 0x01DF: return "xcoff32"; + case 0x01EF: return "xcoff64-upto-aix43"; + case 0x01F7: return "xcoff64-from-aix51"; + default: return "unknown-xcoff-header-magic"; + } +} + +static HChar* name_of_scnhdr_s_flags ( Int flags ) +{ + switch (flags & 0xFFFF) { + case STYP_REG: return "\"regular\""; + case STYP_PAD: return "\"padding\""; + case STYP_TEXT: return "text only"; + case STYP_DATA: return "data only"; + case STYP_BSS: return "bss only"; + case STYP_EXCEPT: return "Exception"; + case STYP_INFO: return "Comment"; + case STYP_LOADER: return "Loader"; + case STYP_DEBUG: return "Debug"; + case STYP_TYPCHK: return "Typecheck"; + case STYP_OVRFLO: return "Overflow"; + default: return "unknown-section-header-name"; + } +} + +static HChar* name_of_syment_n_sclass ( Int sclass ) +{ + static HChar buf[10]; + switch (sclass) { + /* dbx ones (>= 0x80) */ + case C_GSYM: return "gsym"; + case C_LSYM: return "lsym"; + case C_PSYM: return "psym"; + case C_RSYM: return "rsym"; + case C_RPSYM: return "rpsym"; + case C_STSYM: return "stsym"; + case C_DECL: return "decl"; + case C_FUN: return "fun"; + case C_BSTAT: return "bstat"; + case C_ESTAT: return "estat"; + /* non-dbx ones (< 0x80) */ + case C_STAT: return "STAT"; + case C_FILE: return "FILE"; + case C_HIDEXT: return "HIDEXT"; + case C_EXT: return "EXT"; + case C_FCN: return "FCN"; + case C_BINCL: return "BINCL"; + case C_EINCL: return "EINCL"; + case C_BLOCK: return "BLOCK"; + case C_WEAKEXT: return "WEAKEXT"; + default: + VG_(sprintf)(buf, "??%d??", sclass); + return buf; + } +} + +typedef + struct { + Name name; /* symbol's name */ + Addr first; /* first address; always known */ + Addr last; /* last address; may be an overestimate */ + + Name fname; /* source file name, if known */ + Int slnno; /* starting line #, or 0 if unknown */ + Int elnno; /* ending line #, or 0 if unknown */ + + UWord r2value; /* what r2 should be for this fn (tocptr) */ + Bool r2known; /* do we have a r2 value? */ + } + XCoffSym; + +static void init_XCoffSym( XCoffSym* sym ) +{ + sym->name = mk_empty_Name(); + sym->first = 0; + sym->last = 0; + sym->fname = mk_empty_Name(); + sym->slnno = 0; + sym->elnno = 0; + sym->r2known = False; + sym->r2value = False; +} + +/* Compare XCoffSyms by their start address. */ +static Int cmp_XCoffSym_by_start ( void* v1, void* v2 ) +{ + XCoffSym* s1 = (XCoffSym*)v1; + XCoffSym* s2 = (XCoffSym*)v2; + if (s1->first < s2->first) return -1; + if (s1->first > s2->first) return 1; + return 0; +} + +/* Compare XCoffSyms by a slightly weaker ordering, returning zero + (equivalence) for any overlap, and -1 or 1 otherwise. */ +static Int cmp_XCoffSym_by_overlap ( void* v1, void* v2 ) +{ + XCoffSym* s1 = (XCoffSym*)v1; + XCoffSym* s2 = (XCoffSym*)v2; + if (s1->last < s2->first) return -1; + if (s2->last < s1->first) return 1; + return 0; +} + +/* Compare XCoffSyms by their start address, and for equal addresses, + use the name as a secondary sort key. */ +static Int cmp_XCoffSym_by_start_then_name ( void* v1, void* v2 ) +{ + XCoffSym* s1 = (XCoffSym*)v1; + XCoffSym* s2 = (XCoffSym*)v2; + if (s1->first < s2->first) return -1; + if (s1->first > s2->first) return 1; + return cmp_Names(s1->name, s2->name); +} + + +/* csect_idx is an index in the symbol table (start, n_entries) to a + symbol defining a csect. If possible, find the bounds of the csect + and assign them to *first and *last, and return True; else return + False. sntext_1based_if_known is the 1-based number of the text + section. Note: computes stated VMAs, not actual VMAs. */ + +#if defined(VGP_ppc32_aix5) +# define SMTYP_SMTYP(x) ((x) & 0x7) /* symbol type */ +# define CSECT(PP) (((AUXENT*)(PP))->x_csect) +# define CSECT_LEN(PP) (CSECT(PP).x_scnlen) +# define CSECT_ALIGN(PP) (SMTYP_ALIGN(CSECT(PP).x_smtyp)) +# define CSECT_SMTYP(PP) (SMTYP_SMTYP(CSECT(PP).x_smtyp)) +# define CSECT_SCLAS(PP) (CSECT(PP).x_smclas) + +#elif defined(VGP_ppc64_aix5) +# define SMTYP_SMTYP(x) ((x) & 0x7) /* symbol type */ +# define CSECT(PP) (((AUXENT*)(PP))->x_csect) +# define CSECT_LEN(PP) ((((ULong)(CSECT(PP).x_scnlen_hi)) << 32) \ + | ((ULong)(CSECT(PP).x_scnlen_lo))) +# define CSECT_ALIGN(PP) (SMTYP_ALIGN(CSECT(PP).x_smtyp)) +# define CSECT_SMTYP(PP) (SMTYP_SMTYP(CSECT(PP).x_smtyp)) +# define CSECT_SCLAS(PP) (CSECT(PP).x_smclas) + +#else +# error "Unknown platform" + +#endif + + +#define SYM_IX(_tab,_n) ((SYMENT*)(((UChar*)(_tab)) + SYMESZ * (_n))) + +static +Bool get_csect_bounds ( UChar* start, UWord n_entries, + UWord csect_idx, + Int sntext_1based_if_known, + /*OUT*/UChar** first, /*OUT*/UChar** last ) +{ + Bool is_text; + SYMENT* cssym; + AUXENT* csaux; + + vg_assert(SYMESZ == 18); /* both for XCOFF32 and XCOFF64 */ + + if (n_entries < 2) + return False; + if (csect_idx+1 >= n_entries) + return False; + cssym = (SYMENT*)SYM_IX(start, csect_idx); + csaux = (AUXENT*)SYM_IX(start, csect_idx+1); + is_text = sntext_1based_if_known != -1 + && (Int)cssym->n_scnum == sntext_1based_if_known; + + if (!is_text) + return False; + + if (cssym->n_sclass == C_EXT || cssym->n_sclass == C_HIDEXT) { + if (cssym->n_numaux == 1) { + if (CSECT_SMTYP(csaux) == XTY_SD) { + if (0) VG_(printf)("GCB: SD: len is %lld\n", (Long)CSECT_LEN(csaux)); + *first = (UChar*)(cssym->n_value); + *last = *first + CSECT_LEN(csaux)-1; + return True; + } + } else { + /* Possibly complain or take evasive action here. In fact + I've yet to see a case where a csect definition symbol has + n_numaux != 1. */ + } + } + return False; +} + +/* Read symbol and line number info for the given text section. (This + is the central routine for XCOFF reading.) Returns NULL on + success, or the text of an error message otherwise. */ +static +HChar* read_symbol_table ( + /*MOD*/struct _DebugInfo* di, + + /* location of symbol table */ + UChar* oi_symtab, UWord oi_nent_symtab, + + /* location of string table */ + UChar* oi_strtab, UWord oi_n_strtab, + + /* location of debug section (stabs strings, if any) */ + UChar* oi_debug, UWord oi_n_debug, + + /* location of line number info, if any */ + UChar* oi_lnos, UWord oi_nent_lnos, + + /* section indices */ + Int sntext_1based_if_known, + Int sndata_1based_if_known, + + /* where the mapped data section is */ + /* Now in di->data_avma: Addr data_avma, */ + /* Now in di->data_size: UWord data_alen, */ + UWord data_alen_from_auxhdr, + + /* where the mapped toc is (in the data section, + presumably), if known */ + Addr toc_avma, + + /* stated-to-actual VMA offsets */ + Word text_bias, + Word data_bias + ) +{ + SYMENT* sym; + SYMENT* aux; + UInt i, j, nsyms, k, m; + Name name; + Bool is_text, is_data; + XArray* syms = NULL; /* XArray of XCoffSyms */ + + /* If the TOC avma is obviously bogus, get rid of it */ + { + UWord data_maxlen = di->data_size; + if (data_maxlen < data_alen_from_auxhdr) + data_maxlen = data_alen_from_auxhdr; + + //VG_(printf)(" toc_avma %p\n", toc_avma); + //VG_(printf)("data_avma %p\n", data_avma); + //VG_(printf)("dxxx_avma %p\n", data_avma + data_maxlen); + + if (toc_avma != 0 + && (toc_avma < di->data_avma + || toc_avma >= di->data_avma + data_maxlen)) + toc_avma = 0; + //VG_(printf)("2toc_avma %p\n", toc_avma); + } + + /* We can't just treat this as an array of SYMENTs, because C + thinks they have size 20 whereas the spec says they have size 18 + (alignment padding) so doing the obvious thing screws up. Hence + we have to calculate the offset of each entry manually. */ + + if (0) VG_(printf)("size of SYMENT = %ld\n", sizeof(SYMENT)); + + /* ---------------------------------------------------------- + Phase 1: first make a pass through the symbols, looking for + stuff in the text segment. Calculate their actual VMAs, + dump any outside the text segment actual VMA bounds, and + add the rest to 'syms'. + ---------------------------------------------------------- */ + + syms = VG_(newXA)( ML_(dinfo_zalloc), "di.readxcoff.rst.1", + ML_(dinfo_free), sizeof(XCoffSym) ); + + if (SHOW && SHOW_SYMS_P1) { + VG_(printf)("--- BEGIN Phase1 (find text symbol starts) ---\n"); + VG_(printf)("--- note: shown addresses are STATED VMAs ---\n"); + } + + i = 0; + while (1) { + + if (i >= oi_nent_symtab) + break; + + sym = SYM_IX(oi_symtab, i); + is_text = sntext_1based_if_known != -1 + && (Int)sym->n_scnum == sntext_1based_if_known; + is_data = sndata_1based_if_known != -1 + && (Int)sym->n_scnum == sndata_1based_if_known; + + if (SHOW && SHOW_SYMS_P1) + VG_(printf)("Phase1: %5d+%d ", i, (Int)sym->n_numaux); + + name = mk_const_Name("(unknown)"); + if (sym->n_scnum == N_DEBUG && sym->n_sclass == C_FUN) + name = maybeDerefStrTab( sym, oi_debug, oi_n_debug ); + else + if (sym->n_sclass & DBXMASK) + name = mk_const_Name("(dbxstr)"); + else + name = maybeDerefStrTab( sym, oi_strtab, oi_n_strtab); + + if (SHOW && SHOW_SYMS_P1) { + VG_(printf)("%5s(%2d) %6s 0x%016llx ", + is_text ? "text" : is_data ? "data" : "other", + (Int)sym->n_scnum, + name_of_syment_n_sclass(sym->n_sclass), + (ULong)sym->n_value); + print_Name(name); + VG_(printf)("\n"); + } + + i++; + i += sym->n_numaux; + + if (!is_text) + continue; + + /* --- BEGIN regular(ish) symbol --- */ + if ((sym->n_sclass == C_EXT || sym->n_sclass == C_HIDEXT) + && (sym->n_numaux == 1 || sym->n_numaux == 2)) { + /* Dealing with a symbol with a csect entry. By convention + (according to IBM docs) the csect entry is the last + auxiliary for this symbol, if there is more than one + auxiliary present; hence "SYM_IX(oi_symtab, i-1)" below. */ + + aux = SYM_IX(oi_symtab, i-1); + if (0) VG_(printf)("symtype is %d\n", CSECT_SMTYP(aux)); + + if (CSECT_SMTYP(aux) == XTY_SD) { + /* Aux is a csect definition. This is relatively rare, + but at least it is simple: the CSECT_LEN(aux) field + contains it's length, so we just heave that into the + pot for phase 2. */ + XCoffSym cand; + if (0) VG_(printf)("SD: len is %d\n", (Int)CSECT_LEN(aux)); + if (0) VG_(printf)("SD: proposed %#llx\n", (ULong)sym->n_value); + init_XCoffSym(&cand); + cand.first = sym->n_value; + cand.last = cand.first + (UWord)CSECT_LEN(aux) - 1; + + cand.first += text_bias; + cand.last += text_bias; + cand.name = name; + + if (cand.last < di->text_avma + || cand.first >= di->text_avma + di->text_size) + continue; + if (cand.last < cand.first) + continue; + if (is_empty_Name(name)) + continue; + (void)VG_(addToXA)(syms, &cand); + } + + if (CSECT_SMTYP(aux) == XTY_LD) { + /* Aux is a label definition. This is the common case. */ + XCoffSym cand; + Bool ok; + UChar *csect_first, *csect_last; + /* x_scnlen contains the symbol table entry of the + containing csect. Use the symbol's stated vma and csect + end as the initial approximation of this symbol's start + and length. The length will get revised downwards in + Phase 2. */ + init_XCoffSym(&cand); + ok = get_csect_bounds( oi_symtab, oi_nent_symtab, + CSECT_LEN(aux), + sntext_1based_if_known, + &csect_first, &csect_last ); + if (0 && ok) + VG_(printf)("new csect svma %p %p\n", csect_first, csect_last); + if (ok && ((UWord)csect_first) <= ((UWord)sym->n_value) + && ((UWord)sym->n_value) <= ((UWord)csect_last)) { + if (0) { + VG_(printf)("LD: in a csect %p %p\n", + csect_first, csect_last); + VG_(printf)("CAND: %p .. %p %s\n", + (void*)sym->n_value, (void*)csect_last, + "fixme-Name-printing(1)" /*name*/); + } + cand.first = sym->n_value; + cand.last = (Addr)csect_last; + } else { + if (0) { + VG_(printf)("LD: can't compute csect bounds?!\n"); + VG_(printf)("CAND: %p .. %p %s\n", + (HChar*)sym->n_value, + (HChar*)sym->n_value+1, + "fixme-Name-printing(2)" /*name*/); + } + cand.first = sym->n_value; + cand.last = cand.first + 1; + } + + /* cand.first is a stated VMA; turn it into an actual VMA + and ignore it if not in the actual text segment. */ + + cand.first += text_bias; + cand.last += text_bias; + cand.name = name; + + if (cand.last < di->text_avma + || cand.first >= di->text_avma + di->text_size) + continue; + if (cand.last < cand.first) + continue; + if (is_empty_Name(name)) + continue; + + (void)VG_(addToXA)(syms, &cand); + } + } + /* --- END regular(ish) symbol --- */ + + } + + /* ---------------------------------------------------------- + Phase 2: suitable text symbols have been put into 'syms'. Their + start addresses are correct, but end addresses are those of the + containing csect, which is in general way too long. This phase + clips the ends so that the ranges no longer overlap, and thereby + constrains each symbol's range to something which, for the most + part, is correct. + ---------------------------------------------------------- */ + + nsyms = VG_(sizeXA)(syms); + + if (SHOW && SHOW_SYMS_P1) + VG_(printf)("Phase1 acquired %d text symbols\n", nsyms); + + if (SHOW && SHOW_SYMS_P2) { + VG_(printf)("--- BEGIN Phase2 (find text symbol ends) ---\n"); + VG_(printf)("--- note: shown addresses are ACTUAL VMAs ---\n"); + } + + VG_(setCmpFnXA)(syms, cmp_XCoffSym_by_start_then_name); + VG_(sortXA)(syms); + + /* We only know for sure the start addresses (actual VMAs) of + symbols, and an overestimation of their end addresses. So sort + by start address, then clip each symbol so that its end address + does not overlap with the next one along. + + There is a small refinement: if a group of symbols have the same + address, treat them as a group: find the next symbol along that + has a higher start address, and clip all of the group + accordingly. This clips the group as a whole so as not to + overlap following symbols. This leaves prefersym() in + storage.c, which is not XCOFF-specific, to later decide which of + the symbols in the group to keep. + + Another refinement is that we need to get rid of symbols which, + after clipping, have identical starts, ends, and names. So the + sorting uses the name as a secondary key. + */ + + for (i = 0; i < nsyms; i++) { + for (k = i+1; + k < nsyms + && ((XCoffSym*)VG_(indexXA)(syms,i))->first + == ((XCoffSym*)VG_(indexXA)(syms,k))->first; + k++) + ; + /* So now [i .. k-1] is a group all with the same start address. + Clip their ending addresses so they don't overlap [k]. In + the normal case (no overlaps), k == i+1. */ + if (k < nsyms) { + XCoffSym* next = (XCoffSym*)VG_(indexXA)(syms,k); + for (m = i; m < k; m++) { + XCoffSym* here = (XCoffSym*)VG_(indexXA)(syms,m); + vg_assert(here->first < next->first); + if (here->last >= next->first) + here->last = next->first-1; + } + } + i = k-1; + vg_assert(i <= nsyms); + } + + j = 0; + if (nsyms > 0) { + j = 1; + for (i = 1; i < nsyms; i++) { + vg_assert(j <= i); + XCoffSym* s_j1 = (XCoffSym*)VG_(indexXA)(syms, j-1); + XCoffSym* s_j = (XCoffSym*)VG_(indexXA)(syms, j); + XCoffSym* s_i = (XCoffSym*)VG_(indexXA)(syms, i); + if (s_i->first != s_j1->first + || s_i->last != s_j1->last + || 0 != cmp_Names(s_i->name, s_j1->name)) { + *s_j = *s_i; + j++; + } else { + if (SHOW && SHOW_SYMS_P2) { + VG_(printf)("Phase2: dump duplicate "); + print_Name(s_i->name); + VG_(printf)("\n"); + } + } + } + } + vg_assert(j >= 0 && j <= nsyms); + VG_(dropTailXA)(syms, nsyms - j); + nsyms = j; + + if (1) { + for (i = 0; i < nsyms; i++) { + XCoffSym* s = (XCoffSym*)VG_(indexXA)(syms, i); + if (SHOW && SHOW_SYMS_P2) { + VG_(printf)("Phase2: %d 0x%lx 0x%lx ", + i, s->first, s->last); + print_Name(s->name); + VG_(printf)("\n"); + } + } + } + + /* ---------------------------------------------------------- + Phase 3: rescan the symbol table, looking for info on function + start/end line numbers and source file names. Generally + this will be absent for sources compiled without -g. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("--- BEGIN Phase3 (find src filenames " + "& fn start/end line #s) ---\n"); + VG_(printf)("--- note: shown addresses are STATED VMAs ---\n"); + } + + /* The lookupXAs in the C_FUN(.bf) part have to operate by + inclusion. Hence: */ + VG_(setCmpFnXA)(syms, cmp_XCoffSym_by_overlap); + VG_(sortXA)(syms); + + /* In this loop, p3currsym is maintained as a pointer to the most + recent XCoffSym identified as FCN(.bf) (function start). + Subsequent FCN(.ef) (function end) indications are compared + against said symbol. This assumes that function start/end + indications are not nested. */ + + XCoffSym* p3currsym = NULL; + + /* Maintain a stack of filenames. We allow the stack pointer to go + beyond the end, but obviously nothing is stored in this + imaginary part of the stack. */ + Name filenames[N_FILENAME_STACK]; + Int filenames_used = 1; + + Name name_unknown = mk_empty_Name(); + Name name_overflow = mk_const_Name("(filename_stack_overflow)"); + + for (i = 0; i < N_FILENAME_STACK; i++) + filenames[i] = name_unknown; + +# define FNAME_PUSH(_fname) \ + do { \ + vg_assert(filenames_used >= 1);\ + if (filenames_used < N_FILENAME_STACK)\ + filenames[filenames_used] = (_fname);\ + filenames_used++;\ + } while (0) + +# define FNAME_POP \ + do {\ + vg_assert(filenames_used >= 1);\ + if (filenames_used > 1 && filenames_used <= N_FILENAME_STACK) \ + filenames[filenames_used-1] = name_unknown; \ + if (filenames_used > 1)\ + filenames_used--;\ + } while (0) + +# define FNAME_GET_TOP \ + (filenames_used > N_FILENAME_STACK \ + ? name_overflow \ + : filenames[filenames_used-1]) + +# define FNAME_SET_TOP(_fname) \ + do {\ + vg_assert(filenames_used >= 1);\ + filenames[filenames_used-1] = (_fname);\ + } while (0) + + + i = 0; + while (1) { + + if (i >= oi_nent_symtab) + break; + + sym = SYM_IX(oi_symtab, i); + is_text = sntext_1based_if_known != -1 + && (Int)sym->n_scnum == sntext_1based_if_known; + is_data = sndata_1based_if_known != -1 + && (Int)sym->n_scnum == sndata_1based_if_known; + + if (0 && SHOW && SHOW_SYMS_P3) + VG_(printf)("Phase3: %5d+%d ", i, (Int)sym->n_numaux); + + name = mk_const_Name("(unknown)"); + if (sym->n_scnum == N_DEBUG && sym->n_sclass == C_FUN) + name = maybeDerefStrTab( sym, oi_debug, oi_n_debug ); + else + if (sym->n_sclass & DBXMASK) + name = mk_const_Name("(dbxstr)"); + else + name = maybeDerefStrTab( sym, oi_strtab, oi_n_strtab); + + if (0 && SHOW && SHOW_SYMS_P3) { + VG_(printf)("%5s(%2d) %6s 0x%016llx ", + is_text ? "text" : is_data ? "data" : "other", + (Int)sym->n_scnum, + name_of_syment_n_sclass(sym->n_sclass), + (ULong)sym->n_value); + print_Name(name); + VG_(printf)("\n"); + } + + i++; + i += sym->n_numaux; + + /* --- BEGIN C_FILE [source file] --- */ + /* There are two variants of C_FILE: a simple one with n_numaux + == 0, where the primary name is what we're after, and another + variant with n_numaux == 3, in which we have to hunt around + in the auxiliary entries to find the file name. gcc produces + exclusively the first kind, and xlc a mixture of both. */ + if (sym->n_sclass == C_FILE && sym->n_numaux == 0) { + if (!is_empty_Name(name)) + FNAME_SET_TOP(name); + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("Phase3: %5d+%d FILE ", + i-1-sym->n_numaux, (Int)sym->n_numaux ); + print_Name(name); + VG_(printf)("\n"); + } + continue; + } + if (sym->n_sclass == C_FILE && sym->n_numaux > 1 + && sym->n_numaux <= 5 /*stay sane*/) { + for (k = 0; k < sym->n_numaux; k++) { + aux = SYM_IX(oi_symtab, i - sym->n_numaux + k); + Name fname + = maybeDerefStrTab_fname( + (UChar*)&((AUXENT*)aux)->x_file.x_fname, + oi_strtab, oi_n_strtab); + if (((AUXENT*)aux)->x_file._x.x_ftype == XFT_FN) { + if (!is_empty_Name(fname)) + FNAME_SET_TOP(fname); + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("Phase3: %5d+%d FILE ", + i-1-sym->n_numaux, (Int)sym->n_numaux ); + print_Name(fname); + VG_(printf)("\n"); + } + break; + } + } + continue; + } + /* --- END C_FILE [source file] --- */ + + /* --- BEGIN C_BINCL [beginning of include] --- */ + if (sym->n_sclass == C_BINCL && sym->n_numaux == 0) { + FNAME_PUSH(name); + if (SHOW && SHOW_SYMS_P3) + VG_(printf)("Phase3: %5d+%d BINCL %s\n", + i-1-sym->n_numaux, (Int)sym->n_numaux, + "fixme-Name-printing(3)" /*name*/ ); + continue; + } + /* --- END C_BINCL [beginning of include] --- */ + + /* --- BEGIN C_EINCL [end of include] --- */ + if (sym->n_sclass == C_EINCL && sym->n_numaux == 0) { + FNAME_POP; + if (SHOW && SHOW_SYMS_P3) + VG_(printf)("Phase3: %5d+%d EINCL %s\n", + i-1-sym->n_numaux, (Int)sym->n_numaux, + "fixme-Name-printing(4)" /*name*/ ); + continue; + } + /* --- END C_EINCL [end of include] --- */ + + /* everything else that is interesting is in the text + section. */ + if (!is_text) + continue; + + /* --- BEGIN C_FCN(.bf) [function begin mark] --- */ + if (sym->n_sclass == C_FCN + && sym->n_numaux == 1 + && eq_string_Name(name, ".bf")) { + /* aux is BLOCK */ + aux = SYM_IX(oi_symtab, i-1); + Addr fn_start_avma = ((Addr)sym->n_value) + text_bias; + Int fn_start_lnno = ((AUXENT*)aux)->x_sym.x_misc.x_lnsz.x_lnno; + /* Look in 'syms' to see if we have anything for address + fn_avma. */ + XCoffSym key; + VG_(memset)(&key, 0, sizeof(key)); + key.first = fn_start_avma; + key.last = fn_start_avma; + Word ix_lo, ix_hi; + + /* Search for all symbols intersecting fn_start_avma. */ + Bool found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + /* All the 'syms' entries from ix_lo to ix_hi match. */ + + for (k = ix_lo; k <= ix_hi; k++) { + XCoffSym* tsym = (XCoffSym*)VG_(indexXA)(syms,k); + + /* note the start line number */ + if (tsym->slnno == 0 && fn_start_lnno > 0) + tsym->slnno = fn_start_lnno; + + /* also the current filename, if we know it */ + if (is_empty_Name(tsym->fname) + && !is_empty_Name(FNAME_GET_TOP)) + tsym->fname = FNAME_GET_TOP; + + /* remember the first in the range as the new current + (I've never seen a range with > 1) */ + if (k == ix_lo) + p3currsym = tsym; + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("Phase3: %5d+%d FCN(.bf) 0x%016llx " + "lnno=%-4d ", + i-1-sym->n_numaux, (Int)sym->n_numaux, + (ULong)sym->n_value, + fn_start_lnno ); + print_Name(tsym->name); + VG_(printf)("\n"); + if (!is_empty_Name(tsym->fname)) { + VG_(printf)("Phase3: "); + print_Name(tsym->fname); + VG_(printf)("\n"); + } + } + } + } + continue; + } + /* --- END C_FCN(.bf) [function begin mark] --- */ + + /* --- BEGIN C_FCN(.ef) [function end mark] --- */ + if (sym->n_sclass == C_FCN + && sym->n_numaux == 1 + && eq_string_Name(name, ".ef")) { + /* aux is BLOCK */ + aux = SYM_IX(oi_symtab, i-1); + /* In this case the n_value field appears to give the address + of the first insn following the end of the function. + Hence the - 1. */ + Addr fn_end_avma = ((Addr)sym->n_value) + text_bias - 1; + Int fn_end_lnno = ((AUXENT*)aux)->x_sym.x_misc.x_lnsz.x_lnno; + + if (p3currsym + && fn_end_avma >= p3currsym->first + && fn_end_avma <= p3currsym->last) { + if (p3currsym->elnno == 0 && fn_end_lnno > 0) + p3currsym->elnno = fn_end_lnno; + if (SHOW && SHOW_SYMS_P3) { + VG_(printf)("Phase3: %5d+%d FCN(.ef) 0x%016llx " + "lnno=%-4d ", + i-1-sym->n_numaux, (Int)sym->n_numaux, + (ULong)sym->n_value, + fn_end_lnno ); + print_Name(p3currsym->name); + VG_(printf)("\n"); + } + if (fn_end_avma < p3currsym->last) { + /* also take the opportunity to trim the symbol's + length to something less than established by the + initial estimation done by Phases 1 and 2. */ + if (0) VG_(printf)("trim end from %#lx to %#lx\n", + p3currsym->last, fn_end_avma); + p3currsym->last = fn_end_avma; + } + } + continue; + } + /* --- END C_FCN(.ef) [function end mark] --- */ + + } + + /* ---------------------------------------------------------- + Phase 4: read and enumerate the line number entries, if + there are any. This depends on knowing the function start/end + line numbers established in Phase 3. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P4) { + VG_(printf)("--- BEGIN Phase4 (read line number info) ---\n"); + VG_(printf)("--- note: shown addresses are ACTUAL VMAs ---\n"); + } + + /* Re-sort 'syms' using the compare-start-addresses ordering, so we + can use that in subsequent searches. */ + VG_(setCmpFnXA)(syms, cmp_XCoffSym_by_start); + VG_(sortXA)(syms); + + if (oi_lnos && oi_nent_lnos > 0) { + +# if defined(VGP_ppc32_aix5) + vg_assert(LINESZ == 6); /* XCOFF32 */ +# elif defined(VGP_ppc64_aix5) + vg_assert(LINESZ == 12); /* XCOFF64 */ +# else +# error "Unknown plat" +# endif + +# define LNO_IX(_tab,_n) \ + ((LINENO*)(((UChar*)(_tab)) + LINESZ * (_n))) + + /* Current fn that we are processing line numbers for */ + XCoffSym* p4currsym = NULL; + + /* SegInfo's string table pointer for p4currsym's file name. + Allocated on demand, so as not to waste space in the + SegInfo's string table. */ + UChar* si_fname_str = NULL; + + /* Ditto the directory name, if we can manage it. */ + UChar* si_dname_str = NULL; + + for (i = 0; i < oi_nent_lnos; i++) { + LINENO* lno = LNO_IX(oi_lnos,i); + + if (lno->l_lnno == 0) { + /* New fn. We get given the index in the symbol table of + the relevant function. It should be a C_EXT, C_WEAKEXT + or C_HIDEXT flavour, according to the IBM docs. */ + Int sym_ix = (Int)lno->l_addr.l_symndx; + sym = SYM_IX(oi_symtab, sym_ix); + if (!(sym->n_sclass == C_EXT + || sym->n_sclass == C_WEAKEXT + || sym->n_sclass == C_HIDEXT)) + return "readxcoff.c: invalid symbol reference" + " in line number info"; + /* For these 3 symbol kinds, the n_value field is the + symbol's stated VMA. Convert this to an actual VMA and + use that to find the associated XCoffSym. */ + Addr sym_avma = ((Addr)sym->n_value) + text_bias; + + XCoffSym key; + VG_(memset)(&key, 0, sizeof(key)); + key.first = sym_avma; + Word ix_lo, ix_hi; + + Bool found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + /* All the 'syms' entries from ix_lo to ix_hi match. + Just use the lowest (sigh ..) */ + p4currsym = (XCoffSym*)VG_(indexXA)(syms, ix_lo); + } else { + /* We can't find the relevant sym, but we still have to + wade through the line number info for this function + until we get to the starting record for the next + one. */ + p4currsym = NULL; + } + + /* If we decide to add any line info for this fn to the + SegInfo, we'll allocate this. Otherwise don't + bother. */ + si_fname_str = NULL; + si_dname_str = NULL; + + if (SHOW && SHOW_SYMS_P4) { + VG_(printf)("Phase4: new fn (%d found), avma 0x%016llx ", + (Int)(ix_hi-ix_lo+1), + (ULong)sym_avma ); + if (p4currsym) + print_Name(p4currsym->name); + else + VG_(printf)("UNKNOWN"); + VG_(printf)("\n"); + } + + } else { + /* Line number entry for the current fn. */ + if (!p4currsym) + continue; + Int line_no = (Int)(UInt)lno->l_lnno; + line_no += (p4currsym->slnno - 1); + Addr line_first_avma = ((Addr)lno->l_addr.l_paddr) + text_bias; + if (line_first_avma < p4currsym->first + || line_first_avma > p4currsym->last) + continue; + Addr line_last_avma = p4currsym->last; + /* Try to refine the last_avma by looking at the next + line's entry. */ + + /* XXX: TODO. What we have currently works only because + the generic line number canonicaliser truncates + overlapping address ranges in the way which we happen + to need anyway. */ + if (SHOW && SHOW_SYMS_P4) + VG_(printf)("Phase4: line %d 0x%016llx - 0x%016llx\n", + line_no, (ULong)line_first_avma, + (ULong)line_last_avma); + + /* This now has to be allocated. Try and figure out the + dir name at the same time. This is a bit ugly in that + it involves messing with the string after it's been + copied into the SegInfo's string table, but seems + harmless enough. */ + if ((!si_fname_str) && !is_empty_Name(p4currsym->fname)) { + si_dname_str = NULL; + si_fname_str = ML_(addStr)(di, p4currsym->fname.vec, + p4currsym->fname.len); + UChar* lastslash = VG_(strrchr)(si_fname_str, '/'); + if (lastslash) + vg_assert(lastslash[0] == '/'); + if (lastslash[1] != 0) { + si_dname_str = si_fname_str; + lastslash[0] = 0; /* replace the / with a NUL + terminator */ + si_fname_str = lastslash+1; + if (0) VG_(printf)("XXX %s %s\n", si_dname_str, + si_fname_str); + } + } + /* finally .. */ + if (line_no >= 0) + ML_(addLineInfo)(di, si_fname_str, si_dname_str, + line_first_avma, line_last_avma+1, + line_no, i/*debugging only*/); + } + } + +# undef LNO_IX + } + +#if defined(OFFICIAL_PHASE5) + /* ---------------------------------------------------------- + Phase 5: Do another trawl of the XCOFF symbol table, looking + for TOC entries for the entries we've already placed in 'syms'. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P5) + VG_(printf)("--- BEGIN official Phase5 (find TOC pointers) ---\n"); + + Bool is_cfun; + + i = 0; + while (1) { + + if (i >= oi_nent_symtab) + break; + + sym = SYM_IX(oi_symtab, i); + is_text = sntext_1based_if_known != -1 + && (Int)sym->n_scnum == sntext_1based_if_known; + is_data = sndata_1based_if_known != -1 + && (Int)sym->n_scnum == sndata_1based_if_known; + is_cfun = sym->n_scnum == N_DEBUG + && sym->n_sclass == C_FUN; + + i++; + i += sym->n_numaux; + + if (!is_cfun && !is_data) + continue; + + if (SHOW && SHOW_SYMS_P5) + VG_(printf)("Phase5o: %5d+%d ", i-1-sym->n_numaux, + (Int)sym->n_numaux); + + name = mk_const_Name("(unknown)"); + if (is_cfun) + name = maybeDerefStrTab( sym, oi_debug, oi_n_debug ); + else + if (sym->n_sclass & DBXMASK) + name = mk_const_Name("(dbxstr)"); + else + name = maybeDerefStrTab( sym, oi_strtab, oi_n_strtab); + + if (SHOW && SHOW_SYMS_P5) { + VG_(printf)("%5s(%2d) %6s svma 0x%016llx ", + is_text ? "text" : is_data ? "data" : "other", + (Int)sym->n_scnum, + name_of_syment_n_sclass(sym->n_sclass), + (ULong)sym->n_value); + print_Name(name); + VG_(printf)("\n"); + } + + Addr avma = (Addr)sym->n_value + data_bias; + if (0) VG_(printf)("data sym: avma %p, limits %p-%p\n", + avma, data_avma,data_avma + data_alen); + + /* Does avma point to 3 valid words inside the actual data + segment? iow, can it possibly be a valid function + descriptor? If not, move on. */ + if (! (avma >= data_avma + && avma + 3 * sizeof(Word) <= data_avma + data_alen) ) + continue; + + UWord* fndescr = (UWord*)avma; + + if (SHOW && SHOW_SYMS_P5) + VG_(printf)(" fndescr = {0x%lx,0x%lx}\n", + fndescr[0], fndescr[1]); + + /* Another check: fndescr[0], the entry point, must point inside + the actual text segment. Discard any that don't. */ + + Addr fndescr_0 = (Addr)fndescr[0]; + if (fndescr_0 < si->text_avma + || fndescr_0 >= si->text_avma+si->text_size) + continue; + + /* Let's suppose that fndescr is the descriptor for a + function with name NAME. If that's so, then 'syms' + acquired by stage 2 should have an entry of name '.NAME' + whose address is fndescr[0]. If so, then fndescr[1] must + be the relevant r2 value for it. */ + /* Look in 'syms' to see if we have anything for address + fndescr[0]. */ + XCoffSym key; + VG_(memset)(&key, 0, sizeof(key)); + key.first = fndescr_0; + Word ix_lo, ix_hi; + Bool found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + /* So all the 'syms' entries from ix_lo to ix_hi have an + address which matches the entry point address stated in + this descriptor. For each one, as a final sanity + check, see if the 'syms' entry has a name .NAME where + NAME is that of the data symbol currently under + consideration. If so, it's a pretty good bet that this + descriptor matches the text symbol we already have, and + so we have a valid tocptr value from fndescr[1]. */ + for (k = ix_lo; k <= ix_hi; k++) { + XCoffSym* tsym = (XCoffSym*)VG_(indexXA)(syms,k); + vg_assert(!is_empty_Name(tsym->name)); + /* VG_(printf)("cmp %s %s\n", name, tsym->name); */ + /* VG_(printf)("found matching %d %s\n", k, tsym->name); */ + if (tsym->name.len == 1 + name.len + && tsym->name.vec[0] == '.' + && 0 == VG_(memcmp)(&tsym->name.vec[1], + &name.vec[0], name.len)) { + Addr r2val = fndescr[1]; + if (tsym->r2known) { + if (tsym->r2value != r2val) + /* COMPLAIN - conflicting r2 values*/ ; + } else { + tsym->r2known = True; + tsym->r2value = r2val; + } + } + } + } + + } + +#else /* !defined(OFFICIAL_PHASE5) */ + /* ---------------------------------------------------------- + Alternative kludgey Phase 5: find TOC entries for 'syms' by the + blunt-instrument approach of scanning the actual data section + and noting anything that looks like a function descriptor. + This is dangerous in the sense that if there are any 3 word + structs which are not real function descriptors but just happen + to look like them, then those will be included too. + Seems unlikely though. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P5) + VG_(printf)("--- BEGIN kludged Phase5 (find TOC pointers) ---\n"); + + if (SHOW) + VG_(printf)("Phase5: actual data segment: %#lx %#lx\n", + di->data_avma, di->data_avma + di->data_size); + + /* Skip obviously-missing data sections. */ + if (di->data_avma != 0 && di->data_size >= sizeof(UWord)) { + + /* set up for inspecting all the aligned words in the actual + data section. */ + + Addr tmp = di->data_avma; + while (tmp & (sizeof(UWord)-1)) + tmp++; + + UWord* first_data_word = (UWord*)tmp; + tmp = di->data_avma + di->data_size - sizeof(UWord); + while (tmp & (sizeof(UWord)-1)) + tmp--; + UWord* last_data_word = (UWord*)tmp; + + if (SHOW) + VG_(printf)("Phase5: data segment conservatively aligned %p %p\n", + first_data_word, last_data_word); + + UWord* wP = first_data_word; + UWord w; + + while (True) { + + XCoffSym key; + Word ix_lo, ix_hi; + Bool found; + + if (& wP[2] > last_data_word) + break; /* no space left for a 3-word descriptor */ + + w = wP[0]; + if (!(w >= di->text_avma + && w < di->text_avma + di->text_size)) { + wP++; + continue; /* entry pointer is not to text segment */ + } + + w = wP[1]; + if (!(w >= di->data_avma && w < di->data_avma + di->data_size)) { + wP++; + if (SHOW && SHOW_SYMS_P5) { + VG_(memset)(&key, 0, sizeof(key)); + key.first = wP[0]; + found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + vg_assert(ix_lo <= ix_hi); + XCoffSym* tsym = (XCoffSym*)VG_(indexXA)(syms,ix_lo); + VG_(printf)("Phase5: bad tocptc at 0x%016llx={", + (ULong)(UWord)(wP-1)); + print_Name(tsym->name); + VG_(printf)(",%p}\n", (void*)w); + } + } + continue; /* r2 value does not point to data segment */ + } + + /* ok, so wP might be a valid fn descr. But does it point to + a text symbol we know about? Look in 'syms' to see if we + have anything for wP[0]. */ + VG_(memset)(&key, 0, sizeof(key)); + key.first = wP[0]; + found = VG_(lookupXA)(syms, &key, &ix_lo, &ix_hi); + if (found) { + for (k = ix_lo; k <= ix_hi; k++) { + XCoffSym* tsym = (XCoffSym*)VG_(indexXA)(syms,k); + Addr r2val = wP[1]; + if (tsym->r2known) { + if (tsym->r2value != r2val) + /* COMPLAIN - conflicting r2 values*/ ; + } else { + tsym->r2known = True; + tsym->r2value = r2val; + if (SHOW && SHOW_SYMS_P5) { + VG_(printf)("Phase5: found tocptr 0x%016llx for ", + (ULong)r2val); + print_Name(tsym->name); + VG_(printf)("\n"); + } + } + } + } + + wP++; + } + } + +#endif /* defined(OFFICIAL_PHASE5) */ + + /* ---------------------------------------------------------- + Phase 6: trivial: copy the syms out of 'syms' into the + generic debuginfo tables, and free up 'syms'. + ---------------------------------------------------------- */ + + if (SHOW && SHOW_SYMS_P6) { + VG_(printf)("--- BEGIN Phase6 (finalise symbol info) ---\n"); + VG_(printf)("--- note: shown addresses are ACTUAL VMAs ---\n"); + } + + for (i = 0; i < nsyms; i++) { + DiSym dis; + XCoffSym* s = (XCoffSym*)VG_(indexXA)(syms, i); + Addr addr = s->first; + UWord size = s->last + 1 - s->first; + Bool guessed_toc = False; + + /* If everything worked right, the symbol should fall within the + mapped text segment. Hence .. */ + Bool sane = addr >= di->text_avma + && addr+size <= di->text_avma + di->text_size; + + if (SHOW && SHOW_SYMS_P6) { + VG_(printf)("Phase6: %s %3d 0x%08lx-0x%08lx 0x%08lx ", + sane ? " " : "BAD", + i, + addr, + addr + size - 1, + s->r2known ? s->r2value : 0 ); + print_Name(s->name); + VG_(printf)("\n"); + } + +# if defined(VGP_ppc64_aix5) + /* 64-bit kludge: if we can't find a plausible toc ptr just use + the one specified in the XCOFF auxiliary header. */ + if ((!s->r2known) + && toc_avma != 0 + && s->name.len > 8 + && 0==VG_(strncmp)(&s->name.vec[0], "._vgwZU_", 8)) { + s->r2known = True; + s->r2value = toc_avma; + guessed_toc = True; + if (SHOW && SHOW_SYMS_P6) + VG_(printf)("Phase6: assuming toc 0x%08lx for above sym\n", + s->r2value); + } +# endif + + /* Actually add the symbol (finallyatlast) */ + if (sane) { + UInt nlen; + dis.addr = addr; + dis.size = size; + dis.tocptr = s->r2known ? s->r2value : 0; + dis.isText = True; + vg_assert(!is_empty_Name(s->name)); + nlen = s->name.len; + vg_assert(nlen > 0); + if (s->name.vec[0] == '.') + dis.name = ML_(addStr)(di, &s->name.vec[1], nlen-1 ); + else + dis.name = ML_(addStr)(di, &s->name.vec[0], nlen-0 ); + ML_(addSym)( di, &dis ); + if (0 && s->r2known) + VG_(printf)("r2 known for %s\n", + "fixme-Name-printing(5)" /*s->name*/ ); + + if (guessed_toc) + VG_(message)(Vg_DebugMsg, "WARNING: assuming toc 0x%lx for %s", + s->r2value, dis.name); + } + } + + /* Free up the XA */ + VG_(deleteXA)(syms); + +# undef SYM_IX + + return NULL; /*success*/ +} + + +static void show_loader_section ( struct _DebugInfo* di, + UChar* oi_start, UWord size ) +{ + Int i, j; + LDHDR* hdr = (LDHDR*)oi_start; + UChar* strtab_import = NULL; + UChar* strtab_other = NULL; + if (SHOW) { + VG_(printf)(" l_version %llu\n", (ULong)hdr->l_version); + VG_(printf)(" l_nsyms %lld\n", (Long)hdr->l_nsyms); + VG_(printf)(" l_nreloc %lld\n", (Long)hdr->l_nreloc); + VG_(printf)(" l_istlen (i st len) %lld\n", (Long)hdr->l_istlen); + VG_(printf)(" l_impoff (i st off) %llu\n", (ULong)hdr->l_impoff); + VG_(printf)(" l_nimpid (# imps) %llu\n", (ULong)hdr->l_nimpid); + VG_(printf)(" l_stlen (st len) %llu\n", (ULong)hdr->l_stlen); + VG_(printf)(" l_stoff (st off) %llu\n", (ULong)hdr->l_stoff); + } + + if (hdr->l_istlen > 0) + strtab_import = oi_start + hdr->l_impoff; + if (hdr->l_stlen > 0) + strtab_other = oi_start + hdr->l_stoff; + + if (strtab_import) { + if (SHOW) + VG_(printf)(" Loader Import String Table: %llu bytes\n", + (ULong)hdr->l_istlen); + i = 0; + j = 0; + while (1) { + if (i >= hdr->l_istlen) + break; + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)(" %3d%s ", i, (j%3)==0 ? "::" : " "); + j++; + while (i < hdr->l_istlen && strtab_import[i]) { + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)("%c", sanitiseChar(strtab_import[i])); + i++; + } + i++; + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)("\n"); + } + } + + if (strtab_other) { + if (SHOW) + VG_(printf)(" Loader Other String Table: %llu bytes\n", + (ULong)hdr->l_stlen); + i = 0; + while (1) { + int len = 0; + if (i+1 >= hdr->l_stlen) + break; + len = (unsigned char)strtab_other[i]; + len <<= 8; + len |= (unsigned char)strtab_other[i+1]; + i += 2; + if (i >= hdr->l_stlen) + break; + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)(" %2d len %2d ", i, len); + while (len >= 0 && i < hdr->l_stlen && strtab_other[i]) { + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)("%c", sanitiseChar(strtab_other[i])); + i++; + len--; + } + i++; + if (SHOW && SHOW_LD_STRTAB) + VG_(printf)("\n"); + } + } + + if (SHOW) + VG_(printf)(" Loader Symbol Table: %lld entries\n", (Long)hdr->l_nsyms); + LDSYM* sym = (LDSYM*)(oi_start + sizeof(LDHDR)); + for (i = 0; i < hdr->l_nsyms; i++) { + Name name = maybeDerefStrTab( (SYMENT*)&sym[i], + strtab_other, hdr->l_stlen ); + if (SHOW && SHOW_LD_SYMTAB) { + VG_(printf)(" %2d: %016llx sec %d ty 0x%02x " + "scla 0x%02x itab %d ", + i, (ULong)sym[i].l_value, (Int)sym[i].l_scnum, + (Int)sym[i].l_smtype, (Int)sym[i].l_smclas, + (Int)sym[i].l_ifile); + print_Name(name); + VG_(printf)("\n"); + } + } + +# if defined(VGP_ppc32_aix5) + vg_assert(sizeof(LDREL) == 12); +# elif defined(VGP_ppc64_aix5) + vg_assert(sizeof(LDREL) == 16); +# else +# error Unknown platform +# endif + + LDREL* rel = (LDREL*)(&sym[hdr->l_nsyms]); + if (SHOW) + VG_(printf)(" Loader Relocation Table: %lld entries\n", + (Long)hdr->l_nreloc); + for (i = 0; i < hdr->l_nreloc; i++) { + if (SHOW && SHOW_LD_RELTAB) + VG_(printf)(" %3d: va %016llx sym %2lld rty 0x%4x sec %2d\n", + i, (ULong)rel[i].l_vaddr, (Long)rel[i].l_symndx, + (Int)rel[i].l_rtype, (Int)rel[i].l_rsecnm); + } + + if (SHOW) + VG_(printf)("\n"); +} + + +/* Returns True on success, False on any kind of error. + + The object file from which to read symbols is mapped temporarily at + [oimage .. oimage + n_oimage). + + The VMA of where the relevant text section really got loaded (the + "actual VMA", _avma) is [si->text_avma .. si->text_avma + + si->text_size). + + The VMA of the associated data section really got loaded + (the "actual VMA", _avma) is [data_avma .. data_avma + data_alen). + + We will need to peer at the loaded data section in order to make + sense of TOC entries, hence we need to be assured it is mapped and + readable. m_aspacemgr should have given us that assurance, in the + sense that data_avma/data_alen will be save to read in by the time + we get here. +*/ +static +Bool read_xcoff_mapped_object ( struct _DebugInfo* di, + UChar* oimage, UWord n_oimage ) +{ +#define BAD(_msg) do { ML_(symerr)(di, True/*serious*/,_msg); \ + return False; } while (0) + + Int i, j; + + /* The first byte after the oimage - we can't go here */ + UChar* oimage_after = oimage + n_oimage; + + UChar* cursor = oimage; + + /* ------------ File Header ------------ */ +# if defined(VGP_ppc32_aix5) + if (sizeof(FILHDR) != 20) + BAD("readxcoff.c: invalid FILHDR size (32-bit)"); +# elif defined(VGP_ppc64_aix5) + if (sizeof(FILHDR) != 24) + BAD("readxcoff.c: invalid FILHDR size (64-bit)"); +# else +# error "Invalid platform" +# endif + + if (n_oimage < sizeof(FILHDR)) + BAD("readxcoff.c: XCOFF object file header is implausibly small (2)"); + + FILHDR* t_filehdr = (FILHDR*)cursor; + cursor += sizeof(FILHDR); + + if (SHOW) { + VG_(printf)("\nFile Header:\n"); + VG_(printf)(" magic 0x%04x (%s)\n", + (UInt)t_filehdr->f_magic, + name_of_filhdr_f_magic(t_filehdr->f_magic)); + } + +# if defined(VGP_ppc32_aix5) + if (t_filehdr->f_magic != 0x01DF /* XCOFF32 */) + BAD("readxcoff.c: XCOFF32 object file header has invalid magic"); +# elif defined(VGP_ppc64_aix5) + if (t_filehdr->f_magic != 0x01F7 /* XCOFF64 */) + BAD("readxcoff.c: XCOFF64 object file header has invalid magic"); +# else +# error "Invalid platform" +# endif + + if (SHOW) { + VG_(printf)(" # of sections %u\n", (UInt)t_filehdr->f_nscns); + VG_(printf)(" time/date 0x%08llx\n", (ULong)t_filehdr->f_timdat); + VG_(printf)(" symtab foffset %llu\n", (ULong)t_filehdr->f_symptr); + VG_(printf)(" # symtab entries %llu\n", (ULong)t_filehdr->f_nsyms); + VG_(printf)(" size of aux hdr %llu\n", (ULong)t_filehdr->f_opthdr); + VG_(printf)(" flags 0x%04x\n", (UInt)t_filehdr->f_flags); + if (t_filehdr->f_flags) { + VG_(printf)(" "); + if (t_filehdr->f_flags & F_RELFLG) VG_(printf)("NoRelocInfo "); + if (t_filehdr->f_flags & F_EXEC) VG_(printf)("IsExec "); + if (t_filehdr->f_flags & F_LNNO) VG_(printf)("NoLineInfo "); + if (t_filehdr->f_flags & F_LSYMS) VG_(printf)("LSYMS "); + if (t_filehdr->f_flags & F_FDPR_PROF) VG_(printf)("FDPR_PROF "); + if (t_filehdr->f_flags & F_FDPR_OPTI) VG_(printf)("FDPR_OPTI "); + if (t_filehdr->f_flags & F_DSA) VG_(printf)("LargeProc "); +# if defined(F_DEP_1) + if (t_filehdr->f_flags & F_DEP_1) VG_(printf)("DEP_1 "); +# endif +# if defined(F_VARPG) + if (t_filehdr->f_flags & F_VARPG) VG_(printf)("VARPG "); +# endif + if (t_filehdr->f_flags & F_LPTEXT) VG_(printf)("LPTEXT "); + if (t_filehdr->f_flags & F_LPDATA) VG_(printf)("LPDATA "); + if (t_filehdr->f_flags & F_DYNLOAD) VG_(printf)("Dynamic "); + if (t_filehdr->f_flags & F_SHROBJ) VG_(printf)("SharedObj "); + if (t_filehdr->f_flags & F_LOADONLY) VG_(printf)("LOADONLY "); +# if defined(F_DEP_2) + if (t_filehdr->f_flags & F_DEP_2) VG_(printf)("DEP_2 "); +# endif + VG_(printf)("\n"); + } + } + + /* ------------ Auxiliary Header ------------ */ +# if defined(VGP_ppc32_aix5) + if (sizeof(AOUTHDR) != 72) + BAD("readxcoff.c: invalid AOUTHDR size (32-bit)"); +# elif defined(VGP_ppc64_aix5) + if (sizeof(AOUTHDR) != 120) + BAD("readxcoff.c: invalid AOUTHDR size (64-bit)"); +# else +# error "Invalid platform" +# endif + + Int sntext_1based_if_known = -1; + Int sndata_1based_if_known = -1; + + Addr data_svma = 0; /* stated VMA of data section, if known */ + Bool data_svma_known = False; + Word data_bias = 0; + UWord data_alen_from_auxhdr = 0; + + Addr text_svma = 0; /* stated VMA of text section, if known */ + Bool text_svma_known = False; + Word text_bias = 0; + + Addr toc_avma = 0; /* actual VMA of toc, if known */ + Addr toc_svma = 0; /* stated VMA of toc, if known */ + Addr toc_svma_known = False; + + AOUTHDR* t_auxhdr = NULL; + if (t_filehdr->f_opthdr > 0) { + t_auxhdr = (AOUTHDR*)cursor; + cursor += sizeof(AOUTHDR); + sntext_1based_if_known = (Int)t_auxhdr->o_sntext; + sndata_1based_if_known = (Int)t_auxhdr->o_sndata; + + if (SHOW) { + VG_(printf)("\nAuxiliary Header\n"); + VG_(printf)(" magic 0x%04x (should be 0x010b)\n", + (UInt)t_auxhdr->magic); + VG_(printf)(" vstamp 0x%04x\n", (UInt)t_auxhdr->vstamp); + VG_(printf)(" tsize %lld\n", (Long)t_auxhdr->tsize); + VG_(printf)(" dsize %lld\n", (Long)t_auxhdr->dsize); + VG_(printf)(" bsize %lld\n", (Long)t_auxhdr->bsize); + VG_(printf)(" entry 0x%llx\n", (ULong)t_auxhdr->entry); + VG_(printf)(" text_start 0x%llx (stated)\n", + (ULong)t_auxhdr->text_start); + VG_(printf)(" data_start 0x%llx (stated)\n", + (ULong)t_auxhdr->data_start); + VG_(printf)(" o_toc 0x%llx\n", (ULong)t_auxhdr->o_toc); + VG_(printf)(" o_snentry %d\n", (Int)t_auxhdr->o_snentry); + VG_(printf)(" o_sntext %d\n", (Int)t_auxhdr->o_sntext); + VG_(printf)(" o_sndata %d\n", (Int)t_auxhdr->o_sndata); + VG_(printf)(" o_sntoc %d\n", (Int)t_auxhdr->o_sntoc); + VG_(printf)(" o_snloader %d\n", (Int)t_auxhdr->o_snloader); + VG_(printf)(" o_snbss %d\n", (Int)t_auxhdr->o_snbss); + VG_(printf)(" o_algntext %d\n", (Int)t_auxhdr->o_algntext); + VG_(printf)(" o_algndata %d\n", (Int)t_auxhdr->o_algndata); + VG_(printf)(" o_modtype \"%c%c\"\n", + (UChar)t_auxhdr->o_modtype[0], + (UChar)t_auxhdr->o_modtype[1] ); + VG_(printf)(" o_cpuflag 0x%02x\n", (UInt)t_auxhdr->o_cpuflag); + VG_(printf)(" o_cputype 0x%02x\n", (UInt)t_auxhdr->o_cputype); + VG_(printf)(" o_maxstack %llu\n", (ULong)t_auxhdr->o_maxstack); + VG_(printf)(" o_maxdata %llu\n", (ULong)t_auxhdr->o_maxdata); + VG_(printf)(" o_debugger %u\n", t_auxhdr->o_debugger); + /* printf(" o_textpsize %u\n", (UInt)t_auxhdr->o_textpsize); */ + /* printf(" o_stackpsize %u\n", (UInt)t_auxhdr->o_stackpsize); */ + } + + text_svma = t_auxhdr->text_start; + text_svma_known = True; + + data_svma = t_auxhdr->data_start; + data_svma_known = True; + + /* The auxhdr may claim the data section is longer than + data_alen, so note the auxhdr-claimed size too. */ + data_alen_from_auxhdr = (UWord)t_auxhdr->dsize; + + if (t_auxhdr->o_sntoc == t_auxhdr->o_sndata) { + toc_svma = (Addr)t_auxhdr->o_toc; + toc_svma_known = True; + } + } + + /* ------------ Section Headers ------------ */ +# if defined(VGP_ppc32_aix5) + if (sizeof(SCNHDR) != 40) + BAD("readxcoff.c: invalid SCNHDR size (32-bit)"); +# elif defined(VGP_ppc64_aix5) + if (sizeof(SCNHDR) != 72) + BAD("readxcoff.c: invalid SCNHDR size (64-bit)"); +# else +# error "Invalid platform" +# endif + + SCNHDR* t_scnhdr = (SCNHDR*)cursor; + + if (SHOW) + VG_(printf)("\nSection Headers: %d entries\n", t_filehdr->f_nscns); + + /* Where the stabs strings are in the oimage */ + UChar* oi_debug = NULL; + UWord oi_n_debug = 0; + + /* Where the line number entries for the text section are + in the oimage */ + UChar* oi_lnos = NULL; + UWord oi_nent_lnos = 0; /* number of records */ + + for (i = 0; i < t_filehdr->f_nscns; i++) { + UChar sname_safe[9]; + for (j = 0; j < 8; j++) + sname_safe[j] = t_scnhdr[i].s_name[j]; + sname_safe[8] = 0; + if (SHOW) { + VG_(printf)(" --- #%d ---\n", i); + VG_(printf)(" s_name %s\n", sname_safe); + VG_(printf)(" s_paddr 0x%llx\n", (ULong)t_scnhdr[i].s_paddr); + VG_(printf)(" s_vaddr 0x%llx\n", (ULong)t_scnhdr[i].s_vaddr); + VG_(printf)(" s_size %lld\n", (Long)t_scnhdr[i].s_size); + VG_(printf)(" s_scnptr %lld\n", (Long)t_scnhdr[i].s_scnptr); + VG_(printf)(" s_relptr %lld\n", (Long)t_scnhdr[i].s_relptr); + VG_(printf)(" s_lnnoptr %lld\n", (Long)t_scnhdr[i].s_lnnoptr); + VG_(printf)(" s_nreloc %llu\n", (ULong)t_scnhdr[i].s_nreloc); + VG_(printf)(" s_nlnno %llu\n", (ULong)t_scnhdr[i].s_nlnno); + VG_(printf)(" s_flags 0x%llx (%s)\n", + (ULong)t_scnhdr[i].s_flags, + name_of_scnhdr_s_flags(t_scnhdr[i].s_flags)); + } + /* find the stabs strings */ + if (t_scnhdr[i].s_flags == STYP_DEBUG) { + oi_debug = oimage; + oi_debug += (UWord)t_scnhdr[i].s_scnptr; + oi_n_debug = (UWord)t_scnhdr[i].s_size; + } + /* find the line number entries for the text section */ + if (t_scnhdr[i].s_flags == STYP_TEXT && t_scnhdr[i].s_lnnoptr > 0) { + oi_lnos = oimage; + oi_lnos += (UWord)t_scnhdr[i].s_lnnoptr; + oi_nent_lnos = (UWord)t_scnhdr[i].s_nlnno; + /* XCOFF is clearly the result of years of kludgery, and + here's one place it shows. .s_nlnno is a 16-bit field, so + if there are 65535 or more entries, they can't be + represented here. In that case, the real number is stored + in a 32-bit field of a an "overflow section header" - a + dummy section header which has no purpose other than to + hold the correct count. And then this kludge applies to + XCOFF32, not XCOFF64. */ + if (t_scnhdr[i].s_nlnno == 0xFFFF + || t_scnhdr[i].s_nreloc == 0xFFFF) { + /* have to test both fields, according to the docs */ + /* find the relevant overflow header */ + for (j = 0; j < t_filehdr->f_nscns; j++) + if (t_scnhdr[j].s_flags == STYP_OVRFLO + && t_scnhdr[j].s_nlnno == i+1 /* ref to correct scn? */ + && t_scnhdr[j].s_nreloc == i+1 /* also must check this */) + break; + vg_assert(j >= 0 && j <= t_filehdr->f_nscns); + if (j == t_filehdr->f_nscns) + /* Hmm. We're hosed. Give up. */ + BAD("readxcoff.c: can't find a required " + "overflow section header"); + /* finally, we have the real count. */ + oi_nent_lnos = (UWord)t_scnhdr[j].s_vaddr; + } + } + cursor += sizeof(SCNHDR); + } + if (SHOW) { + VG_(printf)("\n debug image (stabs strings) at %p size %ld bytes\n", + oi_debug, oi_n_debug); + VG_(printf)(" line number info at %p with %ld entries\n", + oi_lnos, oi_nent_lnos); + } + + /* ------------ establish Text/data biases ------------ */ + + /* Calculate, into text_bias, the offset that has to be added to + symbol table values (stated VMAs) so as to convert them to correct + addresses in the running image (actual VMAs). I can't find any + documentation for this, so the following is determined empirically. + + There appear to be two classes of loaded object: + + .o files. These have a stated text VMA of zero, and so their + symbols start from zero and work upwards. In that case the + bias is precisely the offset where the text section is + loaded (si->text_avma), that is, the actual text VMA. + + Except -- cryptically -- /usr/include/sys/ldr.h says that the + ld_info.ldinfo_textorg field is "start of loaded program + image (includes the XCOFF headers)". And so to get the + correct text bias it is necessary (determined empirically) to + add on the file offset for the text section. I guess this + means that (1) it is assumed the text section is always the + first in the file, and (2) in this case the stated text VMA + is where the start of the file is mapped, not the start of + the text section. + + Last verified 24 May 06. + + .so files, and executables. These have a non-zero stated text + VMA, for example 0x10000150. They appear to get loaded at some + arbitrary address (actual VMA) which is always a whole number + of pages, eg 0x20002000, and in such a way that the offset is + a whole number of pages. So in this example the offset (bias) + would be 0x20002000 - round_to_page_base(0x10000150). + */ + if (text_svma_known) { +#if 0 + if (text_svma == 0) { + text_bias = di->text_avma; + if (sntext_1based_if_known >= 1 + && sntext_1based_if_known <= t_filehdr->f_nscns) + text_bias += t_scnhdr[sntext_1based_if_known - 1].s_scnptr; + } else { + text_bias = di->text_avma - VG_PGROUNDDN(text_svma); + } +#else + text_bias = di->text_avma - text_svma; + if (sntext_1based_if_known >= 1 + && sntext_1based_if_known <= t_filehdr->f_nscns) + text_bias += t_scnhdr[sntext_1based_if_known - 1].s_scnptr; + +#endif + if (SHOW) + VG_(printf)(" text section: stated vma 0x%lx, " + "actual vma 0x%lx, bias 0x%lx\n", + text_svma, di->text_avma, text_bias); + } else { + text_bias = 0; + if (SHOW) + VG_(printf)(" text section: svma UNKNOWN, bias UNKNOWN\n"); + } + + if (data_svma_known) { + data_bias = di->data_avma - data_svma; + if (SHOW) + VG_(printf)(" data section: stated vma 0x%lx, " + "actual vma 0x%lx, bias 0x%lx\n", + data_svma, di->data_avma, data_bias); + } else { + data_bias = 0; + if (SHOW) + VG_(printf)(" data section: svma UNKNOWN, bias UNKNOWN\n"); + } + + if (toc_svma_known) { + toc_avma = toc_svma + data_bias; + if (SHOW) + VG_(printf)(" toc: stated vma 0x%lx, actual vma 0x%lx\n", + toc_svma, toc_avma); + } else { + if (SHOW) + VG_(printf)(" toc: svma UNKNOWN\n"); + toc_avma = 0; + } + + /* ------------ Section Data ------------ */ + for (i = 0; i < t_filehdr->f_nscns; i++) { + if (SHOW) + VG_(printf)("\nSection Data (sec %d, \"%s\")\n", + i, name_of_scnhdr_s_flags(t_scnhdr[i].s_flags) ); + switch (t_scnhdr[i].s_flags & 0xFFFF) { + case STYP_LOADER: + show_loader_section( di, oimage + t_scnhdr[i].s_scnptr, + t_scnhdr[i].s_size ); + break; + default: + if (SHOW) + VG_(printf)(" Not handled yet\n"); + break; + } + } + + /* ------------ establish String Table ------------ */ + /* This is after the symbol table, if it exists at all. */ + /* This is a bit of a hack. The easy way to find the string table + is assume it immediately follows the symbol table. That doesn't + work if there is no symbol table; but on the other hand if there + is no symbol table then there isn't much point in carrying on. + Hence, if there is no symbol table we just give up here and + claim to have successfully loaded zero symbols. */ + if (t_filehdr->f_nsyms == 0) { + if (SHOW) + VG_(printf)("Object contains no symbols. Stopping here.\n"); + return True; + } + + cursor = oimage; + cursor += t_filehdr->f_symptr; /* symtab start */ + cursor += SYMESZ * t_filehdr->f_nsyms; /* strtab start */ + /* Does this fall inside the file image? The first 4 bytes is the + string table size, so we need to be able to see at least + them. */ + UChar* oi_strtab = NULL; + UWord oi_n_strtab = 0; + if (cursor + 4 <= oimage_after) { + oi_strtab = cursor; + oi_n_strtab = (UWord)( *(UInt*)oi_strtab ); + if (0) { + VG_(printf)("oimage %p\n", oimage); + VG_(printf)("oimage_after %p\n", oimage_after); + VG_(printf)("cursor %p\n", cursor); + } + if (oi_strtab + oi_n_strtab > oimage_after) + BAD("readxcoff.c: string table exceeds image end"); + } + + /* ------------ Symbol Table ------------ */ + if (SHOW) + VG_(printf)("\nSymbol Table: %llu entries\n", (ULong)t_filehdr->f_nsyms); + cursor = oimage; + cursor += t_filehdr->f_symptr; + HChar* badness = read_symbol_table( + di, + cursor, t_filehdr->f_nsyms, + oi_strtab, oi_n_strtab, + oi_debug, oi_n_debug, + oi_lnos, oi_nent_lnos, + sntext_1based_if_known, sndata_1based_if_known, + data_alen_from_auxhdr, + toc_avma, + text_bias, data_bias + ); + if (badness) + BAD(badness); + /* cursor not used after this point */ + + /* ------------ String Table ------------ */ + if (oi_strtab) { + if (SHOW) + VG_(printf)("\nString Table: %lu bytes\n", oi_n_strtab); + i = 4; + while (1) { + if (i >= oi_n_strtab) + break; + if (SHOW && SHOW_STRTAB) + VG_(printf)(" %5d ", i); + while (i < oi_n_strtab && oi_strtab[i]) { + if (SHOW && SHOW_STRTAB) + VG_(printf)("%c", sanitiseChar(oi_strtab[i])); + i++; + } + i++; + if (SHOW && SHOW_STRTAB) + VG_(printf)("\n"); + } + } + + if (SHOW) + VG_(printf)("\n"); + return True; + +#undef BAD +} + + +static ULong ascii_to_ULong ( void* vbuf, Int nbuf ) +{ + Int i; + UChar c; + UChar* buf = (UChar*)vbuf; + ULong n = 0; + for (i = 0; i < nbuf; i++) { + c = buf[i]; + if (c >= '0' && c <= '9') + n = 10ULL * n + (ULong)(c - '0'); + } + return n; +} + + +/* Returns True on success, False if any kind of problem. */ +static +Bool read_xcoff_o_or_a ( /*MOD*/struct _DebugInfo* di, + HChar* a_name, HChar* o_name ) +{ + UChar* image = NULL; + Word n_image = 0; + Bool ok; + Int i; + SysRes sr, fd; + + struct vg_stat stat_buf; + + vg_assert(o_name); + + if (a_name == NULL) { + /* This is just a plain XCOFF object file. */ + + sr = VG_(stat)( o_name, &stat_buf ); + if (sr.isError) { + ML_(symerr)(di, True, "can't stat XCOFF object file"); + return False; + } + + n_image = stat_buf.st_size; + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("XCOFF object file size %ld\n", n_image); + if (n_image <= 0) { + ML_(symerr)(di, True, "implausible XCOFF object file size"); + return False; + } + + fd = VG_(open)( o_name, VKI_O_RDONLY, 0 ); + if (fd.isError) { + ML_(symerr)(di, True, "can't open XCOFF object file"); + return False; + } + + sr = VG_(am_mmap_file_float_valgrind)(n_image, VKI_PROT_READ, + fd.res, 0); + VG_(close)(fd.res); + + if (sr.isError) { + ML_(symerr)(di, True, "can't mmap XCOFF object file"); + return False; + } + + image = (UChar*)sr.res; + ok = read_xcoff_mapped_object( di, image, n_image ); + VG_(am_munmap_valgrind)( (Addr)image, n_image); + + /* assert OK */ + return ok; + + } else { + + /* It's an XCOFF .a file ("ar file format, large"). Map the + whole thing in, find the member specified by O_NAME, and read + symbols from that. */ + + sr = VG_(stat)( a_name, &stat_buf ); + if (sr.isError) { + ML_(symerr)(di, True, "can't stat XCOFF archive file"); + return False; + } + + n_image = stat_buf.st_size; + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("XCOFF archive file size %ld\n", n_image); + if (n_image <= 0) { + ML_(symerr)(di, True, "implausible XCOFF archive file size"); + return False; + } + + fd = VG_(open)( a_name, VKI_O_RDONLY, 0 ); + if (fd.isError) { + ML_(symerr)(di, True, "can't open XCOFF archive file"); + return False; + } + + sr = VG_(am_mmap_file_float_valgrind)(n_image, VKI_PROT_READ, + fd.res, 0); + VG_(close)(fd.res); + + if (sr.isError) { + ML_(symerr)(di, True, "can't mmap XCOFF archive file"); + return False; + } + + image = (UChar*)sr.res; + ok = False; + + /* Right. Let's go looking for the requested object. First, + peer at the archive's fixed header. */ + + if (n_image < sizeof(FL_HDR)) { + ML_(symerr)(di, True, "XCOFF archive too small for fixed header"); + goto done; + } + + FL_HDR* fl_hdr = (FL_HDR*)image; + if (SHOW && SHOW_AR_DETAILS) { + VG_(printf)("magic: %s\n", fl_hdr->fl_magic); + VG_(printf)("memoff: %s\n", fl_hdr->fl_memoff); + VG_(printf)("gstoff: %s\n", fl_hdr->fl_gstoff); + VG_(printf)("gst64off: %s\n", fl_hdr->fl_gst64off); + } + + { UChar* s = (UChar*)&fl_hdr->fl_magic; + if (s[0] == '<' && s[1] == 'b' && s[2] == 'i' + && s[3] == 'g' && s[4] == 'a' && s[5] == 'f' + && s[6] == '>' && s[7] == '\n') { + /* ok */ + } else { + ML_(symerr)(di, True, + "Is not XCOFF 'big'-variant .a format archive"); + goto done; + } + } + + /* Get a pointer to the member table entry. */ + UChar* mtabC = image + ascii_to_ULong(&fl_hdr->fl_memoff, + sizeof(fl_hdr->fl_memoff)); + AR_HDR* mt_hdr = (AR_HDR*)mtabC; + + if (mtabC < image || mtabC + sizeof(AR_HDR) > image + n_image) { + ML_(symerr)(di, True, + "XCOFF archive member table header exceeds image"); + goto done; + } + + /* should be: backquote newline */ + if (mt_hdr->_ar_name.ar_name[0] != 0x60 /* backquote */ + || mt_hdr->_ar_name.ar_name[1] != 0x0A /* \n */) { + ML_(symerr)(di, True, + "XCOFF archive member table header is invalid"); + goto done; + } + + if (SHOW) { + VG_(printf)("member table ar_size = %lld\n", + ascii_to_ULong(&mt_hdr->ar_size,20)); + VG_(printf)("member table ar_namlen = %lld\n", + ascii_to_ULong(&mt_hdr->ar_namlen,4)); + } + + if (mtabC < image + || mtabC + sizeof(AR_HDR) + + ascii_to_ULong(&mt_hdr->ar_size, 20) + > image + n_image) { + ML_(symerr)(di, True, "XCOFF archive member table exceeds image"); + goto done; + } + + UChar* data = mtabC + sizeof(AR_HDR) + + ascii_to_ULong(&mt_hdr->ar_namlen,4); + /* ALIGN */ + if ( ((UWord)data) & 1 ) data++; + if (SHOW) + VG_(printf)("member table data = %p\n", data); + + UInt nmembers = ascii_to_ULong(data, 20); + if (SHOW) + VG_(printf)("member table contains %d entries\n", nmembers); + for (i = 0; i < nmembers; i++) { + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)(" %d has off %d\n", + i, (Int)ascii_to_ULong(data + 20 + 20*i, 20)); + } + + UChar* p = data + 20 + 20*nmembers; + + for (i = 0; i < nmembers; i++) { + + if (0 != VG_(strcmp)(p, o_name)) + goto move_on; + + UInt objoff = ascii_to_ULong(data + 20 + 20*i, 20); + + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("got offset = %u\n", objoff); + + vg_assert(ok == False); + + /* Sanity check the selected member */ + UChar* o_hdrC = image + objoff; + if (o_hdrC + sizeof(AR_HDR) >= image + n_image) { + ML_(symerr)(di, True, + "XCOFF archive member header exceeds image"); + goto done; + } + AR_HDR* o_hdr = (AR_HDR*)o_hdrC; + UWord o_size = (UWord)ascii_to_ULong(&o_hdr->ar_size, 20); + UChar* o_data = o_hdrC + sizeof(AR_HDR) + + (UWord)ascii_to_ULong(&o_hdr->ar_namlen,4); + + /* ALIGN */ + if ( ((UWord)o_data) & 1 ) o_data++; + + if (SHOW) + VG_(printf)("member data = %p, size = %ld\n", o_data, o_size); + + if (!(o_data >= image && o_data + o_size <= image + n_image)) { + ML_(symerr)(di, True, + "XCOFF archive member exceeds image"); + goto done; + } + + if (o_size < sizeof(FILHDR)) { + ML_(symerr)(di, True, + "XCOFF object file header is implausibly small (1)"); + goto done; + } + + /* It's the right name, but need to also check the magic + number, since some archives contain both a 32-bit and + 64-bit version of the same object. */ + FILHDR* t_filhdr = (FILHDR*)o_data; +# if defined(VGP_ppc32_aix5) + if (t_filhdr->f_magic == 0x01F7 /* XCOFF64 */) { + if (0) + VG_(printf)("Skipping 64-bit archive on 32-bit platform\n"); + goto move_on; + } +# elif defined(VGP_ppc64_aix5) + if (t_filhdr->f_magic == 0x01DF /* XCOFF32 */) { + if (0) + VG_(printf)("Skipping 32-bit archive on 64-bit platform\n"); + goto move_on; + } +# endif + + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("\nimage: %p-%p object: %p-%p\n\n", + image, image+n_image-1, o_data, o_data+o_size-1); + ok = read_xcoff_mapped_object( di, o_data, o_size ); + goto done; + + vg_assert(0); + /* NOTREACHED */ + + move_on: + while (*p) { + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("%c", *p); + p++; + } + if (SHOW && SHOW_AR_DETAILS) + VG_(printf)("\n"); + p++; + } + + vg_assert(i == nmembers); + ML_(symerr)(di, True, "can't find object in XCOFF archive file"); + + done: + if (image) { + VG_(am_munmap_valgrind)( (Addr)image, n_image ); + /* assert munmap succeeded */ + } + return ok; + + } +} + + +/* Main entry point for XCOFF reading. The following di fields must + be filled in by the caller: + + filename + memname (optional) + text_avma, text_size + data_avma, data_size + + and all other fields should be zeroed. +*/ +Bool ML_(read_xcoff_debug_info) ( struct _DebugInfo* di, + Bool is_mainexe ) +{ + Bool ok; + + if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir)) { + if (di->memname) { + VG_(message)(Vg_DebugMsg, "Reading syms from %s(%s) (%#lx)", + di->filename, di->memname, di->text_avma); + } else { + VG_(message)(Vg_DebugMsg, "Reading syms from %s (%#lx)", + di->filename, di->text_avma); + } + } + + if (SHOW) { + VG_(printf)("------------------- BEGIN read xcoff ------------------\n"); + VG_(printf)("--- file: %s\n", di->filename); + VG_(printf)("--- mem: %s\n", di->memname ? di->memname + : (UChar*)"(none)" ); + VG_(printf)("--- t actual vma: %#lx\n", di->text_avma); + VG_(printf)("--- t actual len: %ld\n", di->text_size); + VG_(printf)("--- d actual vma: %#lx\n", di->data_avma); + VG_(printf)("--- d actual len: %ld\n", di->data_size); + } + + if (di->memname) { + /* XCOFF .a file. di->filename is its name, di->memname is the + name of the required .o within it. */ + ok = read_xcoff_o_or_a( di, di->filename, di->memname ); + } else { + /* no archive member name, so di->filename is an XCOFF object */ + ok = read_xcoff_o_or_a( di, NULL, di->filename ); + } + + di->soname = NULL; + if (ok) { + if (is_mainexe) { + di->soname = "NONE"; + } else { + UChar* p = VG_(strrchr)(di->filename, '/'); + p = p ? p+1 : di->filename; + /* p points at the main filename */ + if (di->memname) { + /* set the soname to "archive.a(member.o)" */ + Int nbytes = VG_(strlen)(p) + 1 + VG_(strlen)(di->memname) + 1 + 1; + UChar* so = ML_(dinfo_zalloc)("di.readxcoff.rxdi.1", nbytes); + vg_assert(so); + VG_(sprintf)(so, "%s(%s)", p, di->memname); + vg_assert(VG_(strlen)(so) == nbytes-1); + di->soname = so; + } else { + /* no member name, hence soname = "archive.a" */ + di->soname = ML_(dinfo_strdup)("di.readxcoff.rxdi.2", p); + } + } + if (SHOW) + VG_(printf)("Setting soname to %s\n", di->soname); + } + + if (SHOW) + VG_(printf)("------------------- END read xcoff ------------------\n\n"); + + return ok; +} + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ |