summaryrefslogtreecommitdiff
path: root/helgrind/libhb_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'helgrind/libhb_core.c')
-rw-r--r--helgrind/libhb_core.c5011
1 files changed, 5011 insertions, 0 deletions
diff --git a/helgrind/libhb_core.c b/helgrind/libhb_core.c
new file mode 100644
index 0000000..572b26b
--- /dev/null
+++ b/helgrind/libhb_core.c
@@ -0,0 +1,5011 @@
+
+/*--------------------------------------------------------------------*/
+/*--- LibHB: a library for implementing and checking ---*/
+/*--- the happens-before relationship in concurrent programs. ---*/
+/*--- libhb_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of LibHB, a library for implementing and checking
+ the happens-before relationship in concurrent programs.
+
+ Copyright (C) 2008-2009 OpenWorks Ltd
+ info@open-works.co.uk
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_wordfm.h"
+#include "pub_tool_sparsewa.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_oset.h"
+#include "pub_tool_threadstate.h"
+#include "pub_tool_aspacemgr.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_errormgr.h"
+#include "pub_tool_options.h" // VG_(clo_verbosity)
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"
+#include "hg_errors.h"
+
+#include "libhb.h"
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// Debugging #defines //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/* Check the sanity of shadow values in the core memory state
+ machine. Change #if 0 to #if 1 to enable this. */
+#if 0
+# define CHECK_MSM 1
+#else
+# define CHECK_MSM 0
+#endif
+
+
+/* Check sanity (reference counts, etc) in the conflicting access
+ machinery. Change #if 0 to #if 1 to enable this. */
+#if 0
+# define CHECK_CEM 1
+#else
+# define CHECK_CEM 0
+#endif
+
+
+/* Check sanity in the compressed shadow memory machinery,
+ particularly in its caching innards. Unfortunately there's no
+ almost-zero-cost way to make them selectable at run time. Hence
+ set the #if 0 to #if 1 and rebuild if you want them. */
+#if 0
+# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
+# define inline __attribute__((noinline))
+ /* probably want to ditch -fomit-frame-pointer too */
+#else
+# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
+#endif
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// Forward declarations //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/* fwds for
+ Globals needed by other parts of the library. These are set
+ once at startup and then never changed. */
+static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
+static ExeContext* (*main_get_EC)( Thr* ) = NULL;
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION BEGIN compressed shadow memory //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+#ifndef __HB_ZSM_H
+#define __HB_ZSM_H
+
+typedef ULong SVal;
+
+/* This value has special significance to the implementation, and callers
+ may not store it in the shadow memory. */
+#define SVal_INVALID (3ULL << 62)
+
+/* This is the default value for shadow memory. Initially the shadow
+ memory contains no accessible areas and so all reads produce this
+ value. TODO: make this caller-defineable. */
+#define SVal_NOACCESS (2ULL << 62)
+
+/* Initialise the library. Once initialised, it will (or may) call
+ rcinc and rcdec in response to all the calls below, in order to
+ allow the user to do reference counting on the SVals stored herein.
+ It is important to understand, however, that due to internal
+ caching, the reference counts are in general inaccurate, and can be
+ both above or below the true reference count for an item. In
+ particular, the library may indicate that the reference count for
+ an item is zero, when in fact it is not.
+
+ To make the reference counting exact and therefore non-pointless,
+ call zsm_flush_cache. Immediately after it returns, the reference
+ counts for all items, as deduced by the caller by observing calls
+ to rcinc and rcdec, will be correct, and so any items with a zero
+ reference count may be freed (or at least considered to be
+ unreferenced by this library).
+*/
+static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
+
+static void zsm_set_range ( Addr, SizeT, SVal );
+static SVal zsm_read8 ( Addr );
+static void zsm_copy_range ( Addr, Addr, SizeT );
+static void zsm_flush_cache ( void );
+
+#endif /* ! __HB_ZSM_H */
+
+
+/* Round a up to the next multiple of N. N must be a power of 2 */
+#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
+/* Round a down to the next multiple of N. N must be a power of 2 */
+#define ROUNDDN(a, N) ((a) & ~(N-1))
+
+
+
+/* ------ User-supplied RC functions ------ */
+static void(*rcinc)(SVal) = NULL;
+static void(*rcdec)(SVal) = NULL;
+
+
+/* ------ CacheLine ------ */
+
+#define N_LINE_BITS 6 /* must be >= 3 */
+#define N_LINE_ARANGE (1 << N_LINE_BITS)
+#define N_LINE_TREES (N_LINE_ARANGE >> 3)
+
+typedef
+ struct {
+ UShort descrs[N_LINE_TREES];
+ SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
+ }
+ CacheLine;
+
+#define TREE_DESCR_16_0 (1<<0)
+#define TREE_DESCR_32_0 (1<<1)
+#define TREE_DESCR_16_1 (1<<2)
+#define TREE_DESCR_64 (1<<3)
+#define TREE_DESCR_16_2 (1<<4)
+#define TREE_DESCR_32_1 (1<<5)
+#define TREE_DESCR_16_3 (1<<6)
+#define TREE_DESCR_8_0 (1<<7)
+#define TREE_DESCR_8_1 (1<<8)
+#define TREE_DESCR_8_2 (1<<9)
+#define TREE_DESCR_8_3 (1<<10)
+#define TREE_DESCR_8_4 (1<<11)
+#define TREE_DESCR_8_5 (1<<12)
+#define TREE_DESCR_8_6 (1<<13)
+#define TREE_DESCR_8_7 (1<<14)
+#define TREE_DESCR_DTY (1<<15)
+
+typedef
+ struct {
+ SVal dict[4]; /* can represent up to 4 diff values in the line */
+ UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
+ dict indexes */
+ /* if dict[0] == SVal_INVALID then dict[1] is the index of the
+ LineF to use, and dict[2..] are also SVal_INVALID. */
+ }
+ LineZ; /* compressed rep for a cache line */
+
+typedef
+ struct {
+ Bool inUse;
+ SVal w64s[N_LINE_ARANGE];
+ }
+ LineF; /* full rep for a cache line */
+
+/* Shadow memory.
+ Primary map is a WordFM Addr SecMap*.
+ SecMaps cover some page-size-ish section of address space and hold
+ a compressed representation.
+ CacheLine-sized chunks of SecMaps are copied into a Cache, being
+ decompressed when moved into the cache and recompressed on the
+ way out. Because of this, the cache must operate as a writeback
+ cache, not a writethrough one.
+
+ Each SecMap must hold a power-of-2 number of CacheLines. Hence
+ N_SECMAP_BITS must >= N_LINE_BITS.
+*/
+#define N_SECMAP_BITS 13
+#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
+
+// # CacheLines held by a SecMap
+#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
+
+/* The data in the SecMap is held in the array of LineZs. Each LineZ
+ either carries the required data directly, in a compressed
+ representation, or it holds (in .dict[0]) an index to the LineF in
+ .linesF that holds the full representation.
+
+ Currently-unused LineF's have their .inUse bit set to zero.
+ Since each in-use LineF is referred to be exactly one LineZ,
+ the number of .linesZ[] that refer to .linesF should equal
+ the number of .linesF[] that have .inUse == True.
+
+ RC obligations: the RCs presented to the user include exactly
+ the values in:
+ * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
+ * F reps that are in use (.inUse == True)
+
+ Hence the following actions at the following transitions are required:
+
+ F rep: .inUse==True -> .inUse==False -- rcdec_LineF
+ F rep: .inUse==False -> .inUse==True -- rcinc_LineF
+ Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
+ Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
+*/
+typedef
+ struct {
+ UInt magic;
+ LineZ linesZ[N_SECMAP_ZLINES];
+ LineF* linesF;
+ UInt linesF_size;
+ }
+ SecMap;
+
+#define SecMap_MAGIC 0x571e58cbU
+
+static inline Bool is_sane_SecMap ( SecMap* sm ) {
+ return sm != NULL && sm->magic == SecMap_MAGIC;
+}
+
+/* ------ Cache ------ */
+
+#define N_WAY_BITS 16
+#define N_WAY_NENT (1 << N_WAY_BITS)
+
+/* Each tag is the address of the associated CacheLine, rounded down
+ to a CacheLine address boundary. A CacheLine size must be a power
+ of 2 and must be 8 or more. Hence an easy way to initialise the
+ cache so it is empty is to set all the tag values to any value % 8
+ != 0, eg 1. This means all queries in the cache initially miss.
+ It does however require us to detect and not writeback, any line
+ with a bogus tag. */
+typedef
+ struct {
+ CacheLine lyns0[N_WAY_NENT];
+ Addr tags0[N_WAY_NENT];
+ }
+ Cache;
+
+static inline Bool is_valid_scache_tag ( Addr tag ) {
+ /* a valid tag should be naturally aligned to the start of
+ a CacheLine. */
+ return 0 == (tag & (N_LINE_ARANGE - 1));
+}
+
+
+/* --------- Primary data structures --------- */
+
+/* Shadow memory primary map */
+static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
+static Cache cache_shmem;
+
+
+static UWord stats__secmaps_search = 0; // # SM finds
+static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
+static UWord stats__secmaps_allocd = 0; // # SecMaps issued
+static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
+static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
+static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
+static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
+static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
+static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
+static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
+static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
+static UWord stats__cache_F_fetches = 0; // # F lines fetched
+static UWord stats__cache_F_wbacks = 0; // # F lines written back
+static UWord stats__cache_invals = 0; // # cache invals
+static UWord stats__cache_flushes = 0; // # cache flushes
+static UWord stats__cache_totrefs = 0; // # total accesses
+static UWord stats__cache_totmisses = 0; // # misses
+static ULong stats__cache_make_New_arange = 0; // total arange made New
+static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
+static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
+static UWord stats__cline_read64s = 0; // # calls to s_m_read64
+static UWord stats__cline_read32s = 0; // # calls to s_m_read32
+static UWord stats__cline_read16s = 0; // # calls to s_m_read16
+static UWord stats__cline_read8s = 0; // # calls to s_m_read8
+static UWord stats__cline_write64s = 0; // # calls to s_m_write64
+static UWord stats__cline_write32s = 0; // # calls to s_m_write32
+static UWord stats__cline_write16s = 0; // # calls to s_m_write16
+static UWord stats__cline_write8s = 0; // # calls to s_m_write8
+static UWord stats__cline_set64s = 0; // # calls to s_m_set64
+static UWord stats__cline_set32s = 0; // # calls to s_m_set32
+static UWord stats__cline_set16s = 0; // # calls to s_m_set16
+static UWord stats__cline_set8s = 0; // # calls to s_m_set8
+static UWord stats__cline_get8s = 0; // # calls to s_m_get8
+static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
+static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
+static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
+static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
+static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
+static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
+static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
+
+static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
+ return a & ~(N_SECMAP_ARANGE - 1);
+}
+static inline UWord shmem__get_SecMap_offset ( Addr a ) {
+ return a & (N_SECMAP_ARANGE - 1);
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- map_shmem :: WordFM Addr SecMap ---*/
+/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
+/*----------------------------------------------------------------*/
+
+/*--------------- SecMap allocation --------------- */
+
+static HChar* shmem__bigchunk_next = NULL;
+static HChar* shmem__bigchunk_end1 = NULL;
+
+static void* shmem__bigchunk_alloc ( SizeT n )
+{
+ const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
+ tl_assert(n > 0);
+ n = VG_ROUNDUP(n, 16);
+ tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
+ tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
+ <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
+ if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
+ if (0)
+ VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
+ (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
+ shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
+ if (shmem__bigchunk_next == NULL)
+ VG_(out_of_memory_NORETURN)(
+ "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
+ shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
+ }
+ tl_assert(shmem__bigchunk_next);
+ tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
+ tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
+ shmem__bigchunk_next += n;
+ return shmem__bigchunk_next - n;
+}
+
+static SecMap* shmem__alloc_SecMap ( void )
+{
+ Word i, j;
+ SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
+ if (0) VG_(printf)("alloc_SecMap %p\n",sm);
+ tl_assert(sm);
+ sm->magic = SecMap_MAGIC;
+ for (i = 0; i < N_SECMAP_ZLINES; i++) {
+ sm->linesZ[i].dict[0] = SVal_NOACCESS;
+ sm->linesZ[i].dict[1] = SVal_INVALID;
+ sm->linesZ[i].dict[2] = SVal_INVALID;
+ sm->linesZ[i].dict[3] = SVal_INVALID;
+ for (j = 0; j < N_LINE_ARANGE/4; j++)
+ sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
+ }
+ sm->linesF = NULL;
+ sm->linesF_size = 0;
+ stats__secmaps_allocd++;
+ stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
+ stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
+ stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
+ return sm;
+}
+
+typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
+static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
+
+static SecMap* shmem__find_SecMap ( Addr ga )
+{
+ SecMap* sm = NULL;
+ Addr gaKey = shmem__round_to_SecMap_base(ga);
+ // Cache
+ stats__secmaps_search++;
+ if (LIKELY(gaKey == smCache[0].gaKey))
+ return smCache[0].sm;
+ if (LIKELY(gaKey == smCache[1].gaKey)) {
+ SMCacheEnt tmp = smCache[0];
+ smCache[0] = smCache[1];
+ smCache[1] = tmp;
+ return smCache[0].sm;
+ }
+ if (gaKey == smCache[2].gaKey) {
+ SMCacheEnt tmp = smCache[1];
+ smCache[1] = smCache[2];
+ smCache[2] = tmp;
+ return smCache[1].sm;
+ }
+ // end Cache
+ stats__secmaps_search_slow++;
+ if (VG_(lookupFM)( map_shmem,
+ NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
+ tl_assert(sm != NULL);
+ smCache[2] = smCache[1];
+ smCache[1] = smCache[0];
+ smCache[0].gaKey = gaKey;
+ smCache[0].sm = sm;
+ } else {
+ tl_assert(sm == NULL);
+ }
+ return sm;
+}
+
+static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
+{
+ SecMap* sm = shmem__find_SecMap ( ga );
+ if (LIKELY(sm)) {
+ return sm;
+ } else {
+ /* create a new one */
+ Addr gaKey = shmem__round_to_SecMap_base(ga);
+ sm = shmem__alloc_SecMap();
+ tl_assert(sm);
+ VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
+ return sm;
+ }
+}
+
+
+/* ------------ LineF and LineZ related ------------ */
+
+static void rcinc_LineF ( LineF* lineF ) {
+ UWord i;
+ tl_assert(lineF->inUse);
+ for (i = 0; i < N_LINE_ARANGE; i++)
+ rcinc(lineF->w64s[i]);
+}
+
+static void rcdec_LineF ( LineF* lineF ) {
+ UWord i;
+ tl_assert(lineF->inUse);
+ for (i = 0; i < N_LINE_ARANGE; i++)
+ rcdec(lineF->w64s[i]);
+}
+
+static void rcinc_LineZ ( LineZ* lineZ ) {
+ tl_assert(lineZ->dict[0] != SVal_INVALID);
+ rcinc(lineZ->dict[0]);
+ if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
+ if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
+ if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
+}
+
+static void rcdec_LineZ ( LineZ* lineZ ) {
+ tl_assert(lineZ->dict[0] != SVal_INVALID);
+ rcdec(lineZ->dict[0]);
+ if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
+ if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
+ if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
+}
+
+inline
+static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
+ Word bix, shft, mask, prep;
+ tl_assert(ix >= 0);
+ bix = ix >> 2;
+ shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
+ mask = 3 << shft;
+ prep = b2 << shft;
+ arr[bix] = (arr[bix] & ~mask) | prep;
+}
+
+inline
+static UWord read_twobit_array ( UChar* arr, UWord ix ) {
+ Word bix, shft;
+ tl_assert(ix >= 0);
+ bix = ix >> 2;
+ shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
+ return (arr[bix] >> shft) & 3;
+}
+
+/* Given address 'tag', find either the Z or F line containing relevant
+ data, so it can be read into the cache.
+*/
+static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
+ /*OUT*/LineF** fp, Addr tag ) {
+ LineZ* lineZ;
+ LineF* lineF;
+ UWord zix;
+ SecMap* sm = shmem__find_or_alloc_SecMap(tag);
+ UWord smoff = shmem__get_SecMap_offset(tag);
+ /* since smoff is derived from a valid tag, it should be
+ cacheline-aligned. */
+ tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
+ zix = smoff >> N_LINE_BITS;
+ tl_assert(zix < N_SECMAP_ZLINES);
+ lineZ = &sm->linesZ[zix];
+ lineF = NULL;
+ if (lineZ->dict[0] == SVal_INVALID) {
+ UInt fix = (UInt)lineZ->dict[1];
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < sm->linesF_size);
+ lineF = &sm->linesF[fix];
+ tl_assert(lineF->inUse);
+ lineZ = NULL;
+ }
+ *zp = lineZ;
+ *fp = lineF;
+}
+
+/* Given address 'tag', return the relevant SecMap and the index of
+ the LineZ within it, in the expectation that the line is to be
+ overwritten. Regardless of whether 'tag' is currently associated
+ with a Z or F representation, to rcdec on the current
+ representation, in recognition of the fact that the contents are
+ just about to be overwritten. */
+static __attribute__((noinline))
+void find_Z_for_writing ( /*OUT*/SecMap** smp,
+ /*OUT*/Word* zixp,
+ Addr tag ) {
+ LineZ* lineZ;
+ LineF* lineF;
+ UWord zix;
+ SecMap* sm = shmem__find_or_alloc_SecMap(tag);
+ UWord smoff = shmem__get_SecMap_offset(tag);
+ /* since smoff is derived from a valid tag, it should be
+ cacheline-aligned. */
+ tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
+ zix = smoff >> N_LINE_BITS;
+ tl_assert(zix < N_SECMAP_ZLINES);
+ lineZ = &sm->linesZ[zix];
+ lineF = NULL;
+ /* re RCs, we are freeing up this LineZ/LineF so that new data can
+ be parked in it. Hence have to rcdec it accordingly. */
+ /* If lineZ has an associated lineF, free it up. */
+ if (lineZ->dict[0] == SVal_INVALID) {
+ UInt fix = (UInt)lineZ->dict[1];
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < sm->linesF_size);
+ lineF = &sm->linesF[fix];
+ tl_assert(lineF->inUse);
+ rcdec_LineF(lineF);
+ lineF->inUse = False;
+ } else {
+ rcdec_LineZ(lineZ);
+ }
+ *smp = sm;
+ *zixp = zix;
+}
+
+static __attribute__((noinline))
+void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
+ UInt i, new_size;
+ LineF* nyu;
+
+ if (sm->linesF) {
+ tl_assert(sm->linesF_size > 0);
+ } else {
+ tl_assert(sm->linesF_size == 0);
+ }
+
+ if (sm->linesF) {
+ for (i = 0; i < sm->linesF_size; i++) {
+ if (!sm->linesF[i].inUse) {
+ *fixp = (Word)i;
+ return;
+ }
+ }
+ }
+
+ /* No free F line found. Expand existing array and try again. */
+ new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
+ nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
+ new_size * sizeof(LineF) );
+ tl_assert(nyu);
+
+ stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
+ stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
+ * sizeof(LineF);
+
+ if (0)
+ VG_(printf)("SM %p: expand F array from %d to %d\n",
+ sm, (Int)sm->linesF_size, new_size);
+
+ for (i = 0; i < new_size; i++)
+ nyu[i].inUse = False;
+
+ if (sm->linesF) {
+ for (i = 0; i < sm->linesF_size; i++) {
+ tl_assert(sm->linesF[i].inUse);
+ nyu[i] = sm->linesF[i];
+ }
+ VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
+ HG_(free)(sm->linesF);
+ }
+
+ sm->linesF = nyu;
+ sm->linesF_size = new_size;
+
+ for (i = 0; i < sm->linesF_size; i++) {
+ if (!sm->linesF[i].inUse) {
+ *fixp = (Word)i;
+ return;
+ }
+ }
+
+ /*NOTREACHED*/
+ tl_assert(0);
+}
+
+
+/* ------------ CacheLine and implicit-tree related ------------ */
+
+__attribute__((unused))
+static void pp_CacheLine ( CacheLine* cl ) {
+ Word i;
+ if (!cl) {
+ VG_(printf)("%s","pp_CacheLine(NULL)\n");
+ return;
+ }
+ for (i = 0; i < N_LINE_TREES; i++)
+ VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
+ for (i = 0; i < N_LINE_ARANGE; i++)
+ VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
+}
+
+static UChar descr_to_validbits ( UShort descr )
+{
+ /* a.k.a Party Time for gcc's constant folder */
+# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
+ b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
+ ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
+ ( (b8_5) << 12) | ( (b8_4) << 11) | \
+ ( (b8_3) << 10) | ( (b8_2) << 9) | \
+ ( (b8_1) << 8) | ( (b8_0) << 7) | \
+ ( (b16_3) << 6) | ( (b32_1) << 5) | \
+ ( (b16_2) << 4) | ( (b64) << 3) | \
+ ( (b16_1) << 2) | ( (b32_0) << 1) | \
+ ( (b16_0) << 0) ) )
+
+# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
+ ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
+ ( (bit5) << 5) | ( (bit4) << 4) | \
+ ( (bit3) << 3) | ( (bit2) << 2) | \
+ ( (bit1) << 1) | ( (bit0) << 0) ) )
+
+ /* these should all get folded out at compile time */
+ tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
+ tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
+ tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
+
+ switch (descr) {
+ /*
+ +--------------------------------- TREE_DESCR_8_7
+ | +------------------- TREE_DESCR_8_0
+ | | +---------------- TREE_DESCR_16_3
+ | | | +-------------- TREE_DESCR_32_1
+ | | | | +------------ TREE_DESCR_16_2
+ | | | | | +--------- TREE_DESCR_64
+ | | | | | | +------ TREE_DESCR_16_1
+ | | | | | | | +---- TREE_DESCR_32_0
+ | | | | | | | | +-- TREE_DESCR_16_0
+ | | | | | | | | |
+ | | | | | | | | | GRANULARITY, 7 -> 0 */
+ case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
+ return BYTE(1,1,1,1,1,1,1,1);
+ case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
+ return BYTE(1,1,0,1,1,1,1,1);
+ case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
+ return BYTE(0,1,1,1,1,1,1,1);
+ case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
+ return BYTE(0,1,0,1,1,1,1,1);
+
+ case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
+ return BYTE(1,1,1,1,1,1,0,1);
+ case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
+ return BYTE(1,1,0,1,1,1,0,1);
+ case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
+ return BYTE(0,1,1,1,1,1,0,1);
+ case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
+ return BYTE(0,1,0,1,1,1,0,1);
+
+ case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
+ return BYTE(1,1,1,1,0,1,1,1);
+ case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
+ return BYTE(1,1,0,1,0,1,1,1);
+ case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
+ return BYTE(0,1,1,1,0,1,1,1);
+ case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
+ return BYTE(0,1,0,1,0,1,1,1);
+
+ case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
+ return BYTE(1,1,1,1,0,1,0,1);
+ case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
+ return BYTE(1,1,0,1,0,1,0,1);
+ case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
+ return BYTE(0,1,1,1,0,1,0,1);
+ case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
+ return BYTE(0,1,0,1,0,1,0,1);
+
+ case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
+ return BYTE(0,0,0,1,1,1,1,1);
+ case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
+ return BYTE(0,0,0,1,1,1,0,1);
+ case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
+ return BYTE(0,0,0,1,0,1,1,1);
+ case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
+ return BYTE(0,0,0,1,0,1,0,1);
+
+ case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
+ return BYTE(1,1,1,1,0,0,0,1);
+ case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
+ return BYTE(1,1,0,1,0,0,0,1);
+ case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
+ return BYTE(0,1,1,1,0,0,0,1);
+ case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
+ return BYTE(0,1,0,1,0,0,0,1);
+
+ case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
+ return BYTE(0,0,0,1,0,0,0,1);
+
+ case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
+ return BYTE(0,0,0,0,0,0,0,1);
+
+ default: return BYTE(0,0,0,0,0,0,0,0);
+ /* INVALID - any valid descr produces at least one
+ valid bit in tree[0..7]*/
+ }
+ /* NOTREACHED*/
+ tl_assert(0);
+
+# undef DESCR
+# undef BYTE
+}
+
+__attribute__((unused))
+static Bool is_sane_Descr ( UShort descr ) {
+ return descr_to_validbits(descr) != 0;
+}
+
+static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
+ VG_(sprintf)(dst,
+ "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
+ (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_64) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
+ (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
+ );
+}
+static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
+ VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
+ (Int)((byte & 128) ? 1 : 0),
+ (Int)((byte & 64) ? 1 : 0),
+ (Int)((byte & 32) ? 1 : 0),
+ (Int)((byte & 16) ? 1 : 0),
+ (Int)((byte & 8) ? 1 : 0),
+ (Int)((byte & 4) ? 1 : 0),
+ (Int)((byte & 2) ? 1 : 0),
+ (Int)((byte & 1) ? 1 : 0)
+ );
+}
+
+static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
+ Word i;
+ UChar validbits = descr_to_validbits(descr);
+ HChar buf[128], buf2[128];
+ if (validbits == 0)
+ goto bad;
+ for (i = 0; i < 8; i++) {
+ if (validbits & (1<<i)) {
+ if (tree[i] == SVal_INVALID)
+ goto bad;
+ } else {
+ if (tree[i] != SVal_INVALID)
+ goto bad;
+ }
+ }
+ return True;
+ bad:
+ sprintf_Descr( buf, descr );
+ sprintf_Byte( buf2, validbits );
+ VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
+ VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
+ VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
+ for (i = 0; i < 8; i++)
+ VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
+ VG_(printf)("%s","}\n");
+ return 0;
+}
+
+static Bool is_sane_CacheLine ( CacheLine* cl )
+{
+ Word tno, cloff;
+
+ if (!cl) goto bad;
+
+ for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
+ UShort descr = cl->descrs[tno];
+ SVal* tree = &cl->svals[cloff];
+ if (!is_sane_Descr_and_Tree(descr, tree))
+ goto bad;
+ }
+ tl_assert(cloff == N_LINE_ARANGE);
+ return True;
+ bad:
+ pp_CacheLine(cl);
+ return False;
+}
+
+static UShort normalise_tree ( /*MOD*/SVal* tree )
+{
+ UShort descr;
+ /* pre: incoming tree[0..7] does not have any invalid shvals, in
+ particular no zeroes. */
+ if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
+ || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
+ || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
+ || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
+ tl_assert(0);
+
+ descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
+ | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
+ | TREE_DESCR_8_1 | TREE_DESCR_8_0;
+ /* build 16-bit layer */
+ if (tree[1] == tree[0]) {
+ tree[1] = SVal_INVALID;
+ descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
+ descr |= TREE_DESCR_16_0;
+ }
+ if (tree[3] == tree[2]) {
+ tree[3] = SVal_INVALID;
+ descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
+ descr |= TREE_DESCR_16_1;
+ }
+ if (tree[5] == tree[4]) {
+ tree[5] = SVal_INVALID;
+ descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
+ descr |= TREE_DESCR_16_2;
+ }
+ if (tree[7] == tree[6]) {
+ tree[7] = SVal_INVALID;
+ descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
+ descr |= TREE_DESCR_16_3;
+ }
+ /* build 32-bit layer */
+ if (tree[2] == tree[0]
+ && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
+ tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
+ descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
+ descr |= TREE_DESCR_32_0;
+ }
+ if (tree[6] == tree[4]
+ && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
+ tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
+ descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
+ descr |= TREE_DESCR_32_1;
+ }
+ /* build 64-bit layer */
+ if (tree[4] == tree[0]
+ && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
+ tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
+ descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
+ descr |= TREE_DESCR_64;
+ }
+ return descr;
+}
+
+/* This takes a cacheline where all the data is at the leaves
+ (w8[..]) and builds a correctly normalised tree. */
+static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
+{
+ Word tno, cloff;
+ for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
+ SVal* tree = &cl->svals[cloff];
+ cl->descrs[tno] = normalise_tree( tree );
+ }
+ tl_assert(cloff == N_LINE_ARANGE);
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ stats__cline_normalises++;
+}
+
+
+typedef struct { UChar count; SVal sval; } CountedSVal;
+
+static
+void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
+ /*OUT*/Word* dstUsedP,
+ Word nDst, CacheLine* src )
+{
+ Word tno, cloff, dstUsed;
+
+ tl_assert(nDst == N_LINE_ARANGE);
+ dstUsed = 0;
+
+ for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
+ UShort descr = src->descrs[tno];
+ SVal* tree = &src->svals[cloff];
+
+ /* sequentialise the tree described by (descr,tree). */
+# define PUT(_n,_v) \
+ do { dst[dstUsed ].count = (_n); \
+ dst[dstUsed++].sval = (_v); \
+ } while (0)
+
+ /* byte 0 */
+ if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
+ if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
+ if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
+ if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
+ /* byte 1 */
+ if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
+ /* byte 2 */
+ if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
+ if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
+ /* byte 3 */
+ if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
+ /* byte 4 */
+ if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
+ if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
+ if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
+ /* byte 5 */
+ if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
+ /* byte 6 */
+ if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
+ if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
+ /* byte 7 */
+ if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
+
+# undef PUT
+ /* END sequentialise the tree described by (descr,tree). */
+
+ }
+ tl_assert(cloff == N_LINE_ARANGE);
+ tl_assert(dstUsed <= nDst);
+
+ *dstUsedP = dstUsed;
+}
+
+/* Write the cacheline 'wix' to backing store. Where it ends up
+ is determined by its tag field. */
+static __attribute__((noinline)) void cacheline_wback ( UWord wix )
+{
+ Word i, j, k, m;
+ Addr tag;
+ SecMap* sm;
+ CacheLine* cl;
+ LineZ* lineZ;
+ LineF* lineF;
+ Word zix, fix, csvalsUsed;
+ CountedSVal csvals[N_LINE_ARANGE];
+ SVal sv;
+
+ if (0)
+ VG_(printf)("scache wback line %d\n", (Int)wix);
+
+ tl_assert(wix >= 0 && wix < N_WAY_NENT);
+
+ tag = cache_shmem.tags0[wix];
+ cl = &cache_shmem.lyns0[wix];
+
+ /* The cache line may have been invalidated; if so, ignore it. */
+ if (!is_valid_scache_tag(tag))
+ return;
+
+ /* Where are we going to put it? */
+ sm = NULL;
+ lineZ = NULL;
+ lineF = NULL;
+ zix = fix = -1;
+
+ /* find the Z line to write in and rcdec it or the associated F
+ line. */
+ find_Z_for_writing( &sm, &zix, tag );
+
+ tl_assert(sm);
+ tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
+ lineZ = &sm->linesZ[zix];
+
+ /* Generate the data to be stored */
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+
+ csvalsUsed = -1;
+ sequentialise_CacheLine( csvals, &csvalsUsed,
+ N_LINE_ARANGE, cl );
+ tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
+ if (0) VG_(printf)("%lu ", csvalsUsed);
+
+ lineZ->dict[0] = lineZ->dict[1]
+ = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+
+ /* i indexes actual shadow values, k is cursor in csvals */
+ i = 0;
+ for (k = 0; k < csvalsUsed; k++) {
+
+ sv = csvals[k].sval;
+ if (CHECK_ZSM)
+ tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
+ /* do we already have it? */
+ if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
+ if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
+ if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
+ if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
+ /* no. look for a free slot. */
+ if (CHECK_ZSM)
+ tl_assert(sv != SVal_INVALID);
+ if (lineZ->dict[0]
+ == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
+ if (lineZ->dict[1]
+ == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
+ if (lineZ->dict[2]
+ == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
+ if (lineZ->dict[3]
+ == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
+ break; /* we'll have to use the f rep */
+ dict_ok:
+ m = csvals[k].count;
+ if (m == 8) {
+ write_twobit_array( lineZ->ix2s, i+0, j );
+ write_twobit_array( lineZ->ix2s, i+1, j );
+ write_twobit_array( lineZ->ix2s, i+2, j );
+ write_twobit_array( lineZ->ix2s, i+3, j );
+ write_twobit_array( lineZ->ix2s, i+4, j );
+ write_twobit_array( lineZ->ix2s, i+5, j );
+ write_twobit_array( lineZ->ix2s, i+6, j );
+ write_twobit_array( lineZ->ix2s, i+7, j );
+ i += 8;
+ }
+ else if (m == 4) {
+ write_twobit_array( lineZ->ix2s, i+0, j );
+ write_twobit_array( lineZ->ix2s, i+1, j );
+ write_twobit_array( lineZ->ix2s, i+2, j );
+ write_twobit_array( lineZ->ix2s, i+3, j );
+ i += 4;
+ }
+ else if (m == 1) {
+ write_twobit_array( lineZ->ix2s, i+0, j );
+ i += 1;
+ }
+ else if (m == 2) {
+ write_twobit_array( lineZ->ix2s, i+0, j );
+ write_twobit_array( lineZ->ix2s, i+1, j );
+ i += 2;
+ }
+ else {
+ tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
+ }
+
+ }
+
+ if (LIKELY(i == N_LINE_ARANGE)) {
+ /* Construction of the compressed representation was
+ successful. */
+ rcinc_LineZ(lineZ);
+ stats__cache_Z_wbacks++;
+ } else {
+ /* Cannot use the compressed(z) representation. Use the full(f)
+ rep instead. */
+ tl_assert(i >= 0 && i < N_LINE_ARANGE);
+ alloc_F_for_writing( sm, &fix );
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
+ lineF = &sm->linesF[fix];
+ tl_assert(!lineF->inUse);
+ lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+ lineZ->dict[1] = (SVal)fix;
+ lineF->inUse = True;
+ i = 0;
+ for (k = 0; k < csvalsUsed; k++) {
+ if (CHECK_ZSM)
+ tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
+ sv = csvals[k].sval;
+ if (CHECK_ZSM)
+ tl_assert(sv != SVal_INVALID);
+ for (m = csvals[k].count; m > 0; m--) {
+ lineF->w64s[i] = sv;
+ i++;
+ }
+ }
+ tl_assert(i == N_LINE_ARANGE);
+ rcinc_LineF(lineF);
+ stats__cache_F_wbacks++;
+ }
+}
+
+/* Fetch the cacheline 'wix' from the backing store. The tag
+ associated with 'wix' is assumed to have already been filled in;
+ hence that is used to determine where in the backing store to read
+ from. */
+static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
+{
+ Word i;
+ Addr tag;
+ CacheLine* cl;
+ LineZ* lineZ;
+ LineF* lineF;
+
+ if (0)
+ VG_(printf)("scache fetch line %d\n", (Int)wix);
+
+ tl_assert(wix >= 0 && wix < N_WAY_NENT);
+
+ tag = cache_shmem.tags0[wix];
+ cl = &cache_shmem.lyns0[wix];
+
+ /* reject nonsense requests */
+ tl_assert(is_valid_scache_tag(tag));
+
+ lineZ = NULL;
+ lineF = NULL;
+ find_ZF_for_reading( &lineZ, &lineF, tag );
+ tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
+
+ /* expand the data into the bottom layer of the tree, then get
+ cacheline_normalise to build the descriptor array. */
+ if (lineF) {
+ tl_assert(lineF->inUse);
+ for (i = 0; i < N_LINE_ARANGE; i++) {
+ cl->svals[i] = lineF->w64s[i];
+ }
+ stats__cache_F_fetches++;
+ } else {
+ for (i = 0; i < N_LINE_ARANGE; i++) {
+ SVal sv;
+ UWord ix = read_twobit_array( lineZ->ix2s, i );
+ /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
+ sv = lineZ->dict[ix];
+ tl_assert(sv != SVal_INVALID);
+ cl->svals[i] = sv;
+ }
+ stats__cache_Z_fetches++;
+ }
+ normalise_CacheLine( cl );
+}
+
+static void shmem__invalidate_scache ( void ) {
+ Word wix;
+ if (0) VG_(printf)("%s","scache inval\n");
+ tl_assert(!is_valid_scache_tag(1));
+ for (wix = 0; wix < N_WAY_NENT; wix++) {
+ cache_shmem.tags0[wix] = 1/*INVALID*/;
+ }
+ stats__cache_invals++;
+}
+
+static void shmem__flush_and_invalidate_scache ( void ) {
+ Word wix;
+ Addr tag;
+ if (0) VG_(printf)("%s","scache flush and invalidate\n");
+ tl_assert(!is_valid_scache_tag(1));
+ for (wix = 0; wix < N_WAY_NENT; wix++) {
+ tag = cache_shmem.tags0[wix];
+ if (tag == 1/*INVALID*/) {
+ /* already invalid; nothing to do */
+ } else {
+ tl_assert(is_valid_scache_tag(tag));
+ cacheline_wback( wix );
+ }
+ cache_shmem.tags0[wix] = 1/*INVALID*/;
+ }
+ stats__cache_flushes++;
+ stats__cache_invals++;
+}
+
+
+static inline Bool aligned16 ( Addr a ) {
+ return 0 == (a & 1);
+}
+static inline Bool aligned32 ( Addr a ) {
+ return 0 == (a & 3);
+}
+static inline Bool aligned64 ( Addr a ) {
+ return 0 == (a & 7);
+}
+static inline UWord get_cacheline_offset ( Addr a ) {
+ return (UWord)(a & (N_LINE_ARANGE - 1));
+}
+static inline Addr cacheline_ROUNDUP ( Addr a ) {
+ return ROUNDUP(a, N_LINE_ARANGE);
+}
+static inline Addr cacheline_ROUNDDN ( Addr a ) {
+ return ROUNDDN(a, N_LINE_ARANGE);
+}
+static inline UWord get_treeno ( Addr a ) {
+ return get_cacheline_offset(a) >> 3;
+}
+static inline UWord get_tree_offset ( Addr a ) {
+ return a & 7;
+}
+
+static __attribute__((noinline))
+ CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
+static inline CacheLine* get_cacheline ( Addr a )
+{
+ /* tag is 'a' with the in-line offset masked out,
+ eg a[31]..a[4] 0000 */
+ Addr tag = a & ~(N_LINE_ARANGE - 1);
+ UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+ stats__cache_totrefs++;
+ if (LIKELY(tag == cache_shmem.tags0[wix])) {
+ return &cache_shmem.lyns0[wix];
+ } else {
+ return get_cacheline_MISS( a );
+ }
+}
+
+static __attribute__((noinline))
+ CacheLine* get_cacheline_MISS ( Addr a )
+{
+ /* tag is 'a' with the in-line offset masked out,
+ eg a[31]..a[4] 0000 */
+
+ CacheLine* cl;
+ Addr* tag_old_p;
+ Addr tag = a & ~(N_LINE_ARANGE - 1);
+ UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+
+ tl_assert(tag != cache_shmem.tags0[wix]);
+
+ /* Dump the old line into the backing store. */
+ stats__cache_totmisses++;
+
+ cl = &cache_shmem.lyns0[wix];
+ tag_old_p = &cache_shmem.tags0[wix];
+
+ if (is_valid_scache_tag( *tag_old_p )) {
+ /* EXPENSIVE and REDUNDANT: callee does it */
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ cacheline_wback( wix );
+ }
+ /* and reload the new one */
+ *tag_old_p = tag;
+ cacheline_fetch( wix );
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ return cl;
+}
+
+static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+ stats__cline_64to32pulldown++;
+ switch (toff) {
+ case 0: case 4:
+ tl_assert(descr & TREE_DESCR_64);
+ tree[4] = tree[0];
+ descr &= ~TREE_DESCR_64;
+ descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+ stats__cline_32to16pulldown++;
+ switch (toff) {
+ case 0: case 2:
+ if (!(descr & TREE_DESCR_32_0)) {
+ descr = pulldown_to_32(tree, 0, descr);
+ }
+ tl_assert(descr & TREE_DESCR_32_0);
+ tree[2] = tree[0];
+ descr &= ~TREE_DESCR_32_0;
+ descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
+ break;
+ case 4: case 6:
+ if (!(descr & TREE_DESCR_32_1)) {
+ descr = pulldown_to_32(tree, 4, descr);
+ }
+ tl_assert(descr & TREE_DESCR_32_1);
+ tree[6] = tree[4];
+ descr &= ~TREE_DESCR_32_1;
+ descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+ stats__cline_16to8pulldown++;
+ switch (toff) {
+ case 0: case 1:
+ if (!(descr & TREE_DESCR_16_0)) {
+ descr = pulldown_to_16(tree, 0, descr);
+ }
+ tl_assert(descr & TREE_DESCR_16_0);
+ tree[1] = tree[0];
+ descr &= ~TREE_DESCR_16_0;
+ descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
+ break;
+ case 2: case 3:
+ if (!(descr & TREE_DESCR_16_1)) {
+ descr = pulldown_to_16(tree, 2, descr);
+ }
+ tl_assert(descr & TREE_DESCR_16_1);
+ tree[3] = tree[2];
+ descr &= ~TREE_DESCR_16_1;
+ descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
+ break;
+ case 4: case 5:
+ if (!(descr & TREE_DESCR_16_2)) {
+ descr = pulldown_to_16(tree, 4, descr);
+ }
+ tl_assert(descr & TREE_DESCR_16_2);
+ tree[5] = tree[4];
+ descr &= ~TREE_DESCR_16_2;
+ descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
+ break;
+ case 6: case 7:
+ if (!(descr & TREE_DESCR_16_3)) {
+ descr = pulldown_to_16(tree, 6, descr);
+ }
+ tl_assert(descr & TREE_DESCR_16_3);
+ tree[7] = tree[6];
+ descr &= ~TREE_DESCR_16_3;
+ descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+
+static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
+ UShort mask;
+ switch (toff) {
+ case 0:
+ mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_16_0;
+ break;
+ case 2:
+ mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_16_1;
+ break;
+ case 4:
+ mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_16_2;
+ break;
+ case 6:
+ mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_16_3;
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
+ UShort mask;
+ switch (toff) {
+ case 0:
+ if (!(descr & TREE_DESCR_16_0))
+ descr = pullup_descr_to_16(descr, 0);
+ if (!(descr & TREE_DESCR_16_1))
+ descr = pullup_descr_to_16(descr, 2);
+ mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_32_0;
+ break;
+ case 4:
+ if (!(descr & TREE_DESCR_16_2))
+ descr = pullup_descr_to_16(descr, 4);
+ if (!(descr & TREE_DESCR_16_3))
+ descr = pullup_descr_to_16(descr, 6);
+ mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
+ tl_assert( (descr & mask) == mask );
+ descr &= ~mask;
+ descr |= TREE_DESCR_32_1;
+ break;
+ default:
+ tl_assert(0);
+ }
+ return descr;
+}
+
+static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
+ switch (toff) {
+ case 0: case 4:
+ return 0 != (descr & TREE_DESCR_64);
+ default:
+ tl_assert(0);
+ }
+}
+
+static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
+ switch (toff) {
+ case 0:
+ return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
+ case 2:
+ return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
+ case 4:
+ return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
+ case 6:
+ return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
+ default:
+ tl_assert(0);
+ }
+}
+
+/* ------------ Cache management ------------ */
+
+static void zsm_flush_cache ( void )
+{
+ shmem__flush_and_invalidate_scache();
+}
+
+
+static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
+{
+ tl_assert( sizeof(UWord) == sizeof(Addr) );
+
+ rcinc = p_rcinc;
+ rcdec = p_rcdec;
+
+ tl_assert(map_shmem == NULL);
+ map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
+ HG_(free),
+ NULL/*unboxed UWord cmp*/);
+ tl_assert(map_shmem != NULL);
+ shmem__invalidate_scache();
+
+ /* a SecMap must contain an integral number of CacheLines */
+ tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
+ /* also ... a CacheLine holds an integral number of trees */
+ tl_assert(0 == (N_LINE_ARANGE % 8));
+}
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION END compressed shadow memory //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION BEGIN vts primitives //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+#ifndef __HB_VTS_H
+#define __HB_VTS_H
+
+/* VtsIDs can't exceed 30 bits, since they have to be packed into the
+ lowest 30 bits of an SVal. */
+typedef UInt VtsID;
+#define VtsID_INVALID 0xFFFFFFFF
+
+/* A VTS contains .ts, its vector clock, and also .id, a field to hold
+ a backlink for the caller's convenience. Since we have no idea
+ what to set that to in the library, it always gets set to
+ VtsID_INVALID. */
+typedef
+ struct {
+ VtsID id;
+ XArray* ts; /* XArray* ScalarTS(abstract) */
+ }
+ VTS;
+
+
+/* Create a new, empty VTS. */
+VTS* VTS__new ( void );
+
+/* Delete this VTS in its entirety. */
+void VTS__delete ( VTS* vts );
+
+/* Create a new singleton VTS. */
+VTS* VTS__singleton ( Thr* thr, ULong tym );
+
+/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
+ not modified. */
+VTS* VTS__tick ( Thr* me, VTS* vts );
+
+/* Return a new VTS constructed as the join (max) of the 2 args.
+ Neither arg is modified. */
+VTS* VTS__join ( VTS* a, VTS* b );
+
+/* Compute the partial ordering relation of the two args. */
+typedef
+ enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
+ POrd;
+
+POrd VTS__cmp ( VTS* a, VTS* b );
+
+/* Compute an arbitrary structural (total) ordering on the two args,
+ based on their VCs, so they can be looked up in a table, tree, etc.
+ Returns -1, 0 or 1. */
+Word VTS__cmp_structural ( VTS* a, VTS* b );
+
+/* Debugging only. Display the given VTS in the buffer. */
+void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
+
+/* Debugging only. Return vts[index], so to speak. */
+ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
+
+#endif /* ! __HB_VTS_H */
+
+
+/*--------------- to do with Vector Timestamps ---------------*/
+
+/* Scalar Timestamp */
+typedef
+ struct {
+ Thr* thr;
+ ULong tym;
+ }
+ ScalarTS;
+
+
+static Bool is_sane_VTS ( VTS* vts )
+{
+ UWord i, n;
+ ScalarTS *st1, *st2;
+ if (!vts) return False;
+ if (!vts->ts) return False;
+ n = VG_(sizeXA)( vts->ts );
+ if (n >= 2) {
+ for (i = 0; i < n-1; i++) {
+ st1 = VG_(indexXA)( vts->ts, i );
+ st2 = VG_(indexXA)( vts->ts, i+1 );
+ if (st1->thr >= st2->thr)
+ return False;
+ if (st1->tym == 0 || st2->tym == 0)
+ return False;
+ }
+ }
+ return True;
+}
+
+
+/* Create a new, empty VTS.
+*/
+VTS* VTS__new ( void )
+{
+ VTS* vts;
+ vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
+ tl_assert(vts);
+ vts->id = VtsID_INVALID;
+ vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
+ HG_(free), sizeof(ScalarTS) );
+ tl_assert(vts->ts);
+ return vts;
+}
+
+
+/* Delete this VTS in its entirety.
+*/
+void VTS__delete ( VTS* vts )
+{
+ tl_assert(vts);
+ tl_assert(vts->ts);
+ VG_(deleteXA)( vts->ts );
+ HG_(free)(vts);
+}
+
+
+/* Create a new singleton VTS.
+*/
+VTS* VTS__singleton ( Thr* thr, ULong tym ) {
+ ScalarTS st;
+ VTS* vts;
+ tl_assert(thr);
+ tl_assert(tym >= 1);
+ vts = VTS__new();
+ st.thr = thr;
+ st.tym = tym;
+ VG_(addToXA)( vts->ts, &st );
+ return vts;
+}
+
+
+/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
+ not modified.
+*/
+VTS* VTS__tick ( Thr* me, VTS* vts )
+{
+ ScalarTS* here = NULL;
+ ScalarTS tmp;
+ VTS* res;
+ Word i, n;
+ tl_assert(me);
+ tl_assert(is_sane_VTS(vts));
+ //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
+ // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
+ res = VTS__new();
+ n = VG_(sizeXA)( vts->ts );
+
+ /* main loop doesn't handle zero-entry case correctly, so
+ special-case it. */
+ if (n == 0) {
+ tmp.thr = me;
+ tmp.tym = 1;
+ VG_(addToXA)( res->ts, &tmp );
+ tl_assert(is_sane_VTS(res));
+ return res;
+ }
+
+ for (i = 0; i < n; i++) {
+ here = VG_(indexXA)( vts->ts, i );
+ if (me < here->thr) {
+ /* We just went past 'me', without seeing it. */
+ tmp.thr = me;
+ tmp.tym = 1;
+ VG_(addToXA)( res->ts, &tmp );
+ tmp = *here;
+ VG_(addToXA)( res->ts, &tmp );
+ i++;
+ break;
+ }
+ else if (me == here->thr) {
+ tmp = *here;
+ tmp.tym++;
+ VG_(addToXA)( res->ts, &tmp );
+ i++;
+ break;
+ }
+ else /* me > here->thr */ {
+ tmp = *here;
+ VG_(addToXA)( res->ts, &tmp );
+ }
+ }
+ tl_assert(i >= 0 && i <= n);
+ if (i == n && here && here->thr < me) {
+ tmp.thr = me;
+ tmp.tym = 1;
+ VG_(addToXA)( res->ts, &tmp );
+ } else {
+ for (/*keepgoing*/; i < n; i++) {
+ here = VG_(indexXA)( vts->ts, i );
+ tmp = *here;
+ VG_(addToXA)( res->ts, &tmp );
+ }
+ }
+ tl_assert(is_sane_VTS(res));
+ //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
+ // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
+ return res;
+}
+
+
+/* Return a new VTS constructed as the join (max) of the 2 args.
+ Neither arg is modified.
+*/
+VTS* VTS__join ( VTS* a, VTS* b )
+{
+ Word ia, ib, useda, usedb;
+ ULong tyma, tymb, tymMax;
+ Thr* thr;
+ VTS* res;
+
+ tl_assert(a && a->ts);
+ tl_assert(b && b->ts);
+ useda = VG_(sizeXA)( a->ts );
+ usedb = VG_(sizeXA)( b->ts );
+
+ res = VTS__new();
+ ia = ib = 0;
+
+ while (1) {
+
+ /* This logic is to enumerate triples (thr, tyma, tymb) drawn
+ from a and b in order, where thr is the next Thr*
+ occurring in either a or b, and tyma/b are the relevant
+ scalar timestamps, taking into account implicit zeroes. */
+ tl_assert(ia >= 0 && ia <= useda);
+ tl_assert(ib >= 0 && ib <= usedb);
+
+ if (ia == useda && ib == usedb) {
+ /* both empty - done */
+ break;
+
+ } else if (ia == useda && ib != usedb) {
+ /* a empty, use up b */
+ ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+ thr = tmpb->thr;
+ tyma = 0;
+ tymb = tmpb->tym;
+ ib++;
+
+ } else if (ia != useda && ib == usedb) {
+ /* b empty, use up a */
+ ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+ thr = tmpa->thr;
+ tyma = tmpa->tym;
+ tymb = 0;
+ ia++;
+
+ } else {
+ /* both not empty; extract lowest-Thr*'d triple */
+ ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+ ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+ if (tmpa->thr < tmpb->thr) {
+ /* a has the lowest unconsidered Thr* */
+ thr = tmpa->thr;
+ tyma = tmpa->tym;
+ tymb = 0;
+ ia++;
+ } else if (tmpa->thr > tmpb->thr) {
+ /* b has the lowest unconsidered Thr* */
+ thr = tmpb->thr;
+ tyma = 0;
+ tymb = tmpb->tym;
+ ib++;
+ } else {
+ /* they both next mention the same Thr* */
+ tl_assert(tmpa->thr == tmpb->thr);
+ thr = tmpa->thr; /* == tmpb->thr */
+ tyma = tmpa->tym;
+ tymb = tmpb->tym;
+ ia++;
+ ib++;
+ }
+ }
+
+ /* having laboriously determined (thr, tyma, tymb), do something
+ useful with it. */
+ tymMax = tyma > tymb ? tyma : tymb;
+ if (tymMax > 0) {
+ ScalarTS st;
+ st.thr = thr;
+ st.tym = tymMax;
+ VG_(addToXA)( res->ts, &st );
+ }
+
+ }
+
+ tl_assert(is_sane_VTS( res ));
+
+ return res;
+}
+
+
+/* Compute the partial ordering relation of the two args.
+*/
+POrd VTS__cmp ( VTS* a, VTS* b )
+{
+ Word ia, ib, useda, usedb;
+ ULong tyma, tymb;
+
+ Bool all_leq = True;
+ Bool all_geq = True;
+
+ tl_assert(a && a->ts);
+ tl_assert(b && b->ts);
+ useda = VG_(sizeXA)( a->ts );
+ usedb = VG_(sizeXA)( b->ts );
+
+ ia = ib = 0;
+
+ while (1) {
+
+ /* This logic is to enumerate doubles (tyma, tymb) drawn
+ from a and b in order, and tyma/b are the relevant
+ scalar timestamps, taking into account implicit zeroes. */
+ tl_assert(ia >= 0 && ia <= useda);
+ tl_assert(ib >= 0 && ib <= usedb);
+
+ if (ia == useda && ib == usedb) {
+ /* both empty - done */
+ break;
+
+ } else if (ia == useda && ib != usedb) {
+ /* a empty, use up b */
+ ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+ tyma = 0;
+ tymb = tmpb->tym;
+ ib++;
+
+ } else if (ia != useda && ib == usedb) {
+ /* b empty, use up a */
+ ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+ tyma = tmpa->tym;
+ tymb = 0;
+ ia++;
+
+ } else {
+ /* both not empty; extract lowest-Thr*'d triple */
+ ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+ ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+ if (tmpa->thr < tmpb->thr) {
+ /* a has the lowest unconsidered Thr* */
+ tyma = tmpa->tym;
+ tymb = 0;
+ ia++;
+ }
+ else
+ if (tmpa->thr > tmpb->thr) {
+ /* b has the lowest unconsidered Thr* */
+ tyma = 0;
+ tymb = tmpb->tym;
+ ib++;
+ } else {
+ /* they both next mention the same Thr* */
+ tl_assert(tmpa->thr == tmpb->thr);
+ tyma = tmpa->tym;
+ tymb = tmpb->tym;
+ ia++;
+ ib++;
+ }
+ }
+
+ /* having laboriously determined (tyma, tymb), do something
+ useful with it. */
+ if (tyma < tymb)
+ all_geq = False;
+ if (tyma > tymb)
+ all_leq = False;
+ }
+
+ if (all_leq && all_geq)
+ return POrd_EQ;
+ /* now we know they aren't equal, so either all_leq or all_geq or
+ both are false. */
+ if (all_leq)
+ return POrd_LT;
+ if (all_geq)
+ return POrd_GT;
+ /* hmm, neither all_geq or all_leq. This means unordered. */
+ return POrd_UN;
+}
+
+
+/* Compute an arbitrary structural (total) ordering on the two args,
+ based on their VCs, so they can be looked up in a table, tree, etc.
+ Returns -1, 0 or 1. (really just 'deriving Ord' :-)
+*/
+Word VTS__cmp_structural ( VTS* a, VTS* b )
+{
+ /* We just need to generate an arbitrary total ordering based on
+ a->ts and b->ts. Preferably do it in a way which comes across likely
+ differences relatively quickly. */
+ Word i, useda, usedb;
+ ScalarTS *tmpa, *tmpb;
+
+ tl_assert(a && a->ts);
+ tl_assert(b && b->ts);
+ useda = VG_(sizeXA)( a->ts );
+ usedb = VG_(sizeXA)( b->ts );
+
+ if (useda < usedb) return -1;
+ if (useda > usedb) return 1;
+
+ /* Same length vectors, so let's step through them together. */
+ tl_assert(useda == usedb);
+ for (i = 0; i < useda; i++) {
+ tmpa = VG_(indexXA)( a->ts, i );
+ tmpb = VG_(indexXA)( b->ts, i );
+ if (tmpa->tym < tmpb->tym) return -1;
+ if (tmpa->tym > tmpb->tym) return 1;
+ if (tmpa->thr < tmpb->thr) return -1;
+ if (tmpa->thr > tmpb->thr) return 1;
+ }
+
+ /* They're identical. */
+ return 0;
+}
+
+
+/* Debugging only. Display the given VTS in the buffer.
+*/
+void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
+ ScalarTS* st;
+ HChar unit[64];
+ Word i, n;
+ Int avail = nBuf;
+ tl_assert(vts && vts->ts);
+ tl_assert(nBuf > 16);
+ buf[0] = '[';
+ buf[1] = 0;
+ n = VG_(sizeXA)( vts->ts );
+ for (i = 0; i < n; i++) {
+ tl_assert(avail >= 40);
+ st = VG_(indexXA)( vts->ts, i );
+ VG_(memset)(unit, 0, sizeof(unit));
+ VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
+ st->thr, st->tym);
+ if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
+ VG_(strcat)(buf, " ...]");
+ buf[nBuf-1] = 0;
+ return;
+ }
+ VG_(strcat)(buf, unit);
+ avail -= VG_(strlen)(unit);
+ }
+ VG_(strcat)(buf, "]");
+ buf[nBuf-1] = 0;
+}
+
+
+/* Debugging only. Return vts[index], so to speak.
+*/
+ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
+ UWord i, n;
+ tl_assert(vts && vts->ts);
+ n = VG_(sizeXA)( vts->ts );
+ for (i = 0; i < n; i++) {
+ ScalarTS* st = VG_(indexXA)( vts->ts, i );
+ if (st->thr == idx)
+ return st->tym;
+ }
+ return 0;
+}
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION END vts primitives //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION BEGIN main library //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////
+// //
+// VTS set //
+// //
+/////////////////////////////////////////////////////////
+
+static WordFM* /* VTS* void void */ vts_set = NULL;
+
+static void vts_set_init ( void )
+{
+ tl_assert(!vts_set);
+ vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
+ HG_(free),
+ (Word(*)(UWord,UWord))VTS__cmp_structural );
+ tl_assert(vts_set);
+}
+
+/* Given a newly made VTS, look in vts_set to see if we already have
+ an identical one. If yes, free up this one and return instead a
+ pointer to the existing one. If no, add this one to the set and
+ return the same pointer. Caller differentiates the two cases by
+ comparing returned pointer with the supplied one (although that
+ does require that the supplied VTS is not already in the set).
+*/
+static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
+{
+ UWord keyW, valW;
+ /* lookup cand (by value) */
+ if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
+ /* found it */
+ tl_assert(valW == 0);
+ /* if this fails, cand (by ref) was already present (!) */
+ tl_assert(keyW != (UWord)cand);
+ VTS__delete(cand);
+ return (VTS*)keyW;
+ } else {
+ /* not present. Add and return pointer to same. */
+ VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
+ return cand;
+ }
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// VTS table //
+// //
+/////////////////////////////////////////////////////////
+
+static void VtsID__invalidate_caches ( void ); /* fwds */
+
+/* A type to hold VTS table entries. Invariants:
+ If .vts == NULL, then this entry is not in use, so:
+ - .rc == 0
+ - this entry is on the freelist (unfortunately, does not imply
+ any constraints on value for .nextfree)
+ If .vts != NULL, then this entry is in use:
+ - .vts is findable in vts_set
+ - .vts->id == this entry number
+ - no specific value for .rc (even 0 is OK)
+ - this entry is not on freelist, so .nextfree == VtsID_INVALID
+*/
+typedef
+ struct {
+ VTS* vts; /* vts, in vts_set */
+ UWord rc; /* reference count - enough for entire aspace */
+ VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
+ }
+ VtsTE;
+
+/* The VTS table. */
+static XArray* /* of VtsTE */ vts_tab = NULL;
+
+/* An index into the VTS table, indicating the start of the list of
+ free (available for use) entries. If the list is empty, this is
+ VtsID_INVALID. */
+static VtsID vts_tab_freelist = VtsID_INVALID;
+
+/* Do a GC of vts_tab when the freelist becomes empty AND the size of
+ vts_tab equals or exceeds this size. After GC, the value here is
+ set appropriately so as to check for the next GC point. */
+static Word vts_next_GC_at = 1000;
+
+static void vts_tab_init ( void )
+{
+ vts_tab
+ = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
+ HG_(free), sizeof(VtsTE) );
+ vts_tab_freelist
+ = VtsID_INVALID;
+ tl_assert(vts_tab);
+}
+
+/* Add ii to the free list, checking that it looks out-of-use. */
+static void add_to_free_list ( VtsID ii )
+{
+ VtsTE* ie = VG_(indexXA)( vts_tab, ii );
+ tl_assert(ie->vts == NULL);
+ tl_assert(ie->rc == 0);
+ tl_assert(ie->freelink == VtsID_INVALID);
+ ie->freelink = vts_tab_freelist;
+ vts_tab_freelist = ii;
+}
+
+/* Get an entry from the free list. This will return VtsID_INVALID if
+ the free list is empty. */
+static VtsID get_from_free_list ( void )
+{
+ VtsID ii;
+ VtsTE* ie;
+ if (vts_tab_freelist == VtsID_INVALID)
+ return VtsID_INVALID;
+ ii = vts_tab_freelist;
+ ie = VG_(indexXA)( vts_tab, ii );
+ tl_assert(ie->vts == NULL);
+ tl_assert(ie->rc == 0);
+ vts_tab_freelist = ie->freelink;
+ return ii;
+}
+
+/* Produce a new VtsID that can be used, either by getting it from
+ the freelist, or, if that is empty, by expanding vts_tab. */
+static VtsID get_new_VtsID ( void )
+{
+ VtsID ii;
+ VtsTE te;
+ ii = get_from_free_list();
+ if (ii != VtsID_INVALID)
+ return ii;
+ te.vts = NULL;
+ te.rc = 0;
+ te.freelink = VtsID_INVALID;
+ ii = (VtsID)VG_(addToXA)( vts_tab, &te );
+ return ii;
+}
+
+
+/* Indirect callback from lib_zsm. */
+static void VtsID__rcinc ( VtsID ii )
+{
+ VtsTE* ie;
+ /* VG_(indexXA) does a range check for us */
+ ie = VG_(indexXA)( vts_tab, ii );
+ tl_assert(ie->vts); /* else it's not in use */
+ tl_assert(ie->rc < ~0UL); /* else we can't continue */
+ tl_assert(ie->vts->id == ii);
+ ie->rc++;
+}
+
+/* Indirect callback from lib_zsm. */
+static void VtsID__rcdec ( VtsID ii )
+{
+ VtsTE* ie;
+ /* VG_(indexXA) does a range check for us */
+ ie = VG_(indexXA)( vts_tab, ii );
+ tl_assert(ie->vts); /* else it's not in use */
+ tl_assert(ie->rc > 0); /* else RC snafu */
+ tl_assert(ie->vts->id == ii);
+ ie->rc--;
+}
+
+
+/* Look up 'cand' in our collection of VTSs. If present, deallocate
+ it and return the VtsID for the pre-existing version. If not
+ present, add it to both vts_tab and vts_set, allocate a fresh VtsID
+ for it, and return that. */
+static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
+{
+ VTS* auld;
+ tl_assert(cand->id == VtsID_INVALID);
+ auld = vts_set__find_and_dealloc__or_add(cand);
+ if (auld != cand) {
+ /* We already have an Aulde one. Use that. */
+ VtsTE* ie;
+ tl_assert(auld->id != VtsID_INVALID);
+ ie = VG_(indexXA)( vts_tab, auld->id );
+ tl_assert(ie->vts == auld);
+ return auld->id;
+ } else {
+ VtsID ii = get_new_VtsID();
+ VtsTE* ie = VG_(indexXA)( vts_tab, ii );
+ ie->vts = cand;
+ ie->rc = 0;
+ ie->freelink = VtsID_INVALID;
+ cand->id = ii;
+ return ii;
+ }
+}
+
+
+static void show_vts_stats ( HChar* caller )
+{
+ UWord nSet, nTab, nLive;
+ ULong totrc;
+ UWord n, i;
+ nSet = VG_(sizeFM)( vts_set );
+ nTab = VG_(sizeXA)( vts_tab );
+ totrc = 0;
+ nLive = 0;
+ n = VG_(sizeXA)( vts_tab );
+ for (i = 0; i < n; i++) {
+ VtsTE* ie = VG_(indexXA)( vts_tab, i );
+ if (ie->vts) {
+ nLive++;
+ totrc += (ULong)ie->rc;
+ } else {
+ tl_assert(ie->rc == 0);
+ }
+ }
+ VG_(printf)(" show_vts_stats %s\n", caller);
+ VG_(printf)(" vts_tab size %4lu\n", nTab);
+ VG_(printf)(" vts_tab live %4lu\n", nLive);
+ VG_(printf)(" vts_set size %4lu\n", nSet);
+ VG_(printf)(" total rc %4llu\n", totrc);
+}
+
+/* NOT TO BE CALLED FROM WITHIN libzsm. */
+__attribute__((noinline))
+static void vts_tab__do_GC ( Bool show_stats )
+{
+ UWord i, nTab, nLive, nFreed;
+
+ /* check this is actually necessary. */
+ tl_assert(vts_tab_freelist == VtsID_INVALID);
+
+ /* empty the caches for partial order checks and binary joins. We
+ could do better and prune out the entries to be deleted, but it
+ ain't worth the hassle. */
+ VtsID__invalidate_caches();
+
+ /* First, make the reference counts up to date. */
+ zsm_flush_cache();
+
+ nTab = VG_(sizeXA)( vts_tab );
+
+ if (show_stats) {
+ VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
+ show_vts_stats("before GC");
+ }
+
+ /* Now we can inspect the entire vts_tab. Any entries
+ with zero .rc fields are now no longer in use and can be
+ free list, removed from vts_set, and deleted. */
+ nFreed = 0;
+ for (i = 0; i < nTab; i++) {
+ Bool present;
+ UWord oldK = 0, oldV = 0;
+ VtsTE* te = VG_(indexXA)( vts_tab, i );
+ if (te->vts == NULL) {
+ tl_assert(te->rc == 0);
+ continue; /* already on the free list (presumably) */
+ }
+ if (te->rc > 0)
+ continue; /* in use */
+ /* Ok, we got one we can free. */
+ tl_assert(te->vts->id == i);
+ /* first, remove it from vts_set. */
+ present = VG_(delFromFM)( vts_set,
+ &oldK, &oldV, (UWord)te->vts );
+ tl_assert(present); /* else it isn't in vts_set ?! */
+ tl_assert(oldV == 0); /* no info stored in vts_set val fields */
+ tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
+ /* now free the VTS itself */
+ VTS__delete(te->vts);
+ te->vts = NULL;
+ /* and finally put this entry on the free list */
+ tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
+ add_to_free_list( i );
+ nFreed++;
+ }
+
+ /* Now figure out when the next GC should be. We'll allow the
+ number of VTSs to double before GCing again. Except of course
+ that since we can't (or, at least, don't) shrink vts_tab, we
+ can't set the threshhold value smaller than it. */
+ tl_assert(nFreed <= nTab);
+ nLive = nTab - nFreed;
+ tl_assert(nLive >= 0 && nLive <= nTab);
+ vts_next_GC_at = 2 * nLive;
+ if (vts_next_GC_at < nTab)
+ vts_next_GC_at = nTab;
+
+ if (show_stats) {
+ show_vts_stats("after GC");
+ VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
+ }
+
+ if (VG_(clo_verbosity) > 1) {
+ static UInt ctr = 0;
+ tl_assert(nTab > 0);
+ VG_(message)(Vg_DebugMsg,
+ "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
+ ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
+ }
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Vts IDs //
+// //
+/////////////////////////////////////////////////////////
+
+//////////////////////////
+static ULong stats__getOrdering_queries = 0;
+static ULong stats__getOrdering_misses = 0;
+static ULong stats__join2_queries = 0;
+static ULong stats__join2_misses = 0;
+
+static inline UInt ROL32 ( UInt w, Int n ) {
+ w = (w << n) | (w >> (32-n));
+ return w;
+}
+static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
+ UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
+ return hash % nTab;
+}
+
+#define N_GETORDERING_CACHE 1023
+static
+ struct { VtsID vi1; VtsID vi2; POrd ord; }
+ getOrdering_cache[N_GETORDERING_CACHE];
+
+#define N_JOIN2_CACHE 1023
+static
+ struct { VtsID vi1; VtsID vi2; VtsID res; }
+ join2_cache[N_JOIN2_CACHE];
+
+static void VtsID__invalidate_caches ( void ) {
+ Int i;
+ for (i = 0; i < N_GETORDERING_CACHE; i++) {
+ getOrdering_cache[i].vi1 = VtsID_INVALID;
+ getOrdering_cache[i].vi2 = VtsID_INVALID;
+ getOrdering_cache[i].ord = 0; /* an invalid POrd value */
+ }
+ for (i = 0; i < N_JOIN2_CACHE; i++) {
+ join2_cache[i].vi1 = VtsID_INVALID;
+ join2_cache[i].vi2 = VtsID_INVALID;
+ join2_cache[i].res = VtsID_INVALID;
+ }
+}
+//////////////////////////
+
+//static Bool VtsID__is_valid ( VtsID vi ) {
+// VtsTE* ve;
+// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
+// return False;
+// ve = VG_(indexXA)( vts_tab, vi );
+// if (!ve->vts)
+// return False;
+// tl_assert(ve->vts->id == vi);
+// return True;
+//}
+
+static VTS* VtsID__to_VTS ( VtsID vi ) {
+ VtsTE* te = VG_(indexXA)( vts_tab, vi );
+ tl_assert(te->vts);
+ return te->vts;
+}
+
+static void VtsID__pp ( VtsID vi ) {
+ HChar buf[100];
+ VTS* vts = VtsID__to_VTS(vi);
+ VTS__show( buf, sizeof(buf)-1, vts );
+ buf[sizeof(buf)-1] = 0;
+ VG_(printf)("%s", buf);
+}
+
+/* compute partial ordering relation of vi1 and vi2. */
+__attribute__((noinline))
+static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
+ UInt hash;
+ POrd ord;
+ VTS *v1, *v2;
+ //if (vi1 == vi2) return POrd_EQ;
+ tl_assert(vi1 != vi2);
+ ////++
+ stats__getOrdering_queries++;
+ hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
+ if (getOrdering_cache[hash].vi1 == vi1
+ && getOrdering_cache[hash].vi2 == vi2)
+ return getOrdering_cache[hash].ord;
+ stats__getOrdering_misses++;
+ ////--
+ v1 = VtsID__to_VTS(vi1);
+ v2 = VtsID__to_VTS(vi2);
+ ord = VTS__cmp( v1, v2 );
+ ////++
+ getOrdering_cache[hash].vi1 = vi1;
+ getOrdering_cache[hash].vi2 = vi2;
+ getOrdering_cache[hash].ord = ord;
+ ////--
+ return ord;
+}
+static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
+ return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
+}
+
+/* compute binary join */
+__attribute__((noinline))
+static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
+ UInt hash;
+ VtsID res;
+ VTS *vts1, *vts2, *nyu;
+ //if (vi1 == vi2) return vi1;
+ tl_assert(vi1 != vi2);
+ ////++
+ stats__join2_queries++;
+ hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
+ if (join2_cache[hash].vi1 == vi1
+ && join2_cache[hash].vi2 == vi2)
+ return join2_cache[hash].res;
+ stats__join2_misses++;
+ ////--
+ vts1 = VtsID__to_VTS(vi1);
+ vts2 = VtsID__to_VTS(vi2);
+ nyu = VTS__join(vts1,vts2);
+ res = vts_tab__find_and_dealloc__or_add(nyu);
+ ////++
+ join2_cache[hash].vi1 = vi1;
+ join2_cache[hash].vi2 = vi2;
+ join2_cache[hash].res = res;
+ ////--
+ return res;
+}
+static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
+ return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
+}
+
+/* create a singleton VTS, namely [thr:1] */
+static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
+ VTS* nyu = VTS__singleton(thr,tym);
+ return vts_tab__find_and_dealloc__or_add(nyu);
+}
+
+/* tick operation, creates value 1 if specified index is absent */
+static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
+ VTS* vts = VtsID__to_VTS(vi);
+ VTS* nyu = VTS__tick(idx,vts);
+ return vts_tab__find_and_dealloc__or_add(nyu);
+}
+
+/* index into a VTS (only for assertions) */
+static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
+ VTS* vts = VtsID__to_VTS(vi);
+ return VTS__indexAt_SLOW( vts, idx );
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Threads //
+// //
+/////////////////////////////////////////////////////////
+
+struct _Thr {
+ /* Current VTSs for this thread. They change as we go along. viR
+ is the VTS to be used for reads, viW for writes. Usually they
+ are the same, but can differ when we deal with reader-writer
+ locks. It is always the case that VtsID__getOrdering(viW,viR)
+ == POrd_LT or POrdEQ -- that is, viW must be the same, or
+ lagging behind, viR. */
+ VtsID viR;
+ VtsID viW;
+ /* opaque (to us) data we hold on behalf of the library's user. */
+ void* opaque;
+};
+
+static Thr* Thr__new ( void ) {
+ Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
+ thr->viR = VtsID_INVALID;
+ thr->viW = VtsID_INVALID;
+ return thr;
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Shadow Values //
+// //
+/////////////////////////////////////////////////////////
+
+// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
+// hb_zsm.h. We have to do everything else here.
+
+/* SVal is 64 bit unsigned int.
+
+ <---------30---------> <---------30--------->
+ 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
+ 01 X--------------------X XX X--------------------X E(rror)
+ 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
+ 11 X--------------------X XX X--------------------X I: SVal_INVALID
+*/
+#define SVAL_TAGMASK (3ULL << 62)
+
+static inline Bool SVal__isC ( SVal s ) {
+ return (0ULL << 62) == (s & SVAL_TAGMASK);
+}
+static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
+ //tl_assert(VtsID__is_valid(rmini));
+ //tl_assert(VtsID__is_valid(wmini));
+ return (((ULong)rmini) << 32) | ((ULong)wmini);
+}
+static inline VtsID SVal__unC_Rmin ( SVal s ) {
+ tl_assert(SVal__isC(s));
+ return (VtsID)(s >> 32);
+}
+static inline VtsID SVal__unC_Wmin ( SVal s ) {
+ tl_assert(SVal__isC(s));
+ return (VtsID)(s & 0xFFFFFFFFULL);
+}
+
+static Bool SVal__isE ( SVal s ) {
+ return (1ULL << 62) == (s & SVAL_TAGMASK);
+}
+static SVal SVal__mkE ( void ) {
+ return 1ULL << 62;
+}
+
+static Bool SVal__isA ( SVal s ) {
+ return (2ULL << 62) == (s & SVAL_TAGMASK);
+}
+static SVal SVal__mkA ( void ) {
+ return 2ULL << 62;
+}
+
+/* Direct callback from lib_zsm. */
+static void SVal__rcinc ( SVal s ) {
+ if (SVal__isC(s)) {
+ VtsID__rcinc( SVal__unC_Rmin(s) );
+ VtsID__rcinc( SVal__unC_Wmin(s) );
+ }
+}
+
+/* Direct callback from lib_zsm. */
+static void SVal__rcdec ( SVal s ) {
+ if (SVal__isC(s)) {
+ VtsID__rcdec( SVal__unC_Rmin(s) );
+ VtsID__rcdec( SVal__unC_Wmin(s) );
+ }
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// A simple group (memory) allocator //
+// //
+/////////////////////////////////////////////////////////
+
+//////////////// BEGIN general group allocator
+typedef
+ struct {
+ UWord elemSzB; /* element size */
+ UWord nPerGroup; /* # elems per group */
+ void* (*alloc)(HChar*, SizeT); /* group allocator */
+ HChar* cc; /* group allocator's cc */
+ void (*free)(void*); /* group allocator's free-er (unused) */
+ /* XArray of void* (pointers to groups). The groups themselves.
+ Each element is a pointer to a block of size (elemSzB *
+ nPerGroup) bytes. */
+ XArray* groups;
+ /* next free element. Is a pointer to an element in one of the
+ groups pointed to by .groups. */
+ void* nextFree;
+ }
+ GroupAlloc;
+
+static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
+ UWord elemSzB,
+ UWord nPerGroup,
+ void* (*alloc)(HChar*, SizeT),
+ HChar* cc,
+ void (*free)(void*) )
+{
+ tl_assert(0 == (elemSzB % sizeof(UWord)));
+ tl_assert(elemSzB >= sizeof(UWord));
+ tl_assert(nPerGroup >= 100); /* let's say */
+ tl_assert(alloc);
+ tl_assert(cc);
+ tl_assert(free);
+ tl_assert(ga);
+ VG_(memset)(ga, 0, sizeof(*ga));
+ ga->elemSzB = elemSzB;
+ ga->nPerGroup = nPerGroup;
+ ga->groups = NULL;
+ ga->alloc = alloc;
+ ga->cc = cc;
+ ga->free = free;
+ ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
+ ga->nextFree = NULL;
+ tl_assert(ga->groups);
+}
+
+/* The freelist is empty. Allocate a new group and put all the new
+ elements in it onto the freelist. */
+__attribute__((noinline))
+static void gal_add_new_group ( GroupAlloc* ga )
+{
+ Word i;
+ UWord* group;
+ tl_assert(ga);
+ tl_assert(ga->nextFree == NULL);
+ group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
+ tl_assert(group);
+ /* extend the freelist through the new group. Place the freelist
+ pointer in the first word of each element. That's why the
+ element size must be at least one word. */
+ for (i = ga->nPerGroup-1; i >= 0; i--) {
+ UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
+ UWord* elem = (UWord*)elemC;
+ tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
+ *elem = (UWord)ga->nextFree;
+ ga->nextFree = elem;
+ }
+ /* and add to our collection of groups */
+ VG_(addToXA)( ga->groups, &group );
+}
+
+inline static void* gal_Alloc ( GroupAlloc* ga )
+{
+ UWord* elem;
+ if (UNLIKELY(ga->nextFree == NULL)) {
+ gal_add_new_group(ga);
+ }
+ elem = ga->nextFree;
+ ga->nextFree = (void*)*elem;
+ *elem = 0; /* unnecessary, but just to be on the safe side */
+ return elem;
+}
+
+inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
+{
+ tl_assert(n == ga->elemSzB);
+ return gal_Alloc( ga );
+}
+
+inline static void gal_Free ( GroupAlloc* ga, void* p )
+{
+ UWord* elem = (UWord*)p;
+ *elem = (UWord)ga->nextFree;
+ ga->nextFree = elem;
+}
+//////////////// END general group allocator
+
+
+/////////////////////////////////////////////////////////
+// //
+// Change-event map2 //
+// //
+/////////////////////////////////////////////////////////
+
+#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
+
+/* This is in two parts:
+
+ 1. An OSet of RCECs. This is a set of reference-counted stack
+ traces. When the reference count of a stack trace becomes zero,
+ it is removed from the set and freed up. The intent is to have
+ a set of stack traces which can be referred to from (2), but to
+ only represent each one once. The set is indexed/searched by
+ ordering on the stack trace vectors.
+
+ 2. A SparseWA of OldRefs. These store information about each old
+ ref that we need to record. It is indexed by address of the
+ location for which the information is recorded. For LRU
+ purposes, each OldRef also contains a generation number,
+ indicating when it was most recently accessed.
+
+ The important part of an OldRef is, however, its accs[] array.
+ This is an array of N_OLDREF_ACCS which binds (thread, R/W,
+ size) triples to RCECs. This allows us to collect the last
+ access-traceback by up to N_OLDREF_ACCS different triples for
+ this location. The accs[] array is a MTF-array. If a binding
+ falls off the end, that's too bad -- we will lose info about
+ that triple's access to this location.
+
+ When the SparseWA becomes too big, we can throw away the OldRefs
+ whose generation numbers are below some threshold; hence doing
+ approximate LRU discarding. For each discarded OldRef we must
+ of course decrement the reference count on the all RCECs it
+ refers to, in order that entries from (1) eventually get
+ discarded too.
+
+ A major improvement in reliability of this mechanism would be to
+ have a dynamically sized OldRef.accs[] array, so no entries ever
+ fall off the end. In investigations (Dec 08) it appears that a
+ major cause for the non-availability of conflicting-access traces
+ in race reports is caused by the fixed size of this array. I
+ suspect for most OldRefs, only a few entries are used, but for a
+ minority of cases there is an overflow, leading to info lossage.
+ Investigations also suggest this is very workload and scheduling
+ sensitive. Therefore a dynamic sizing would be better.
+
+ However, dynamic sizing would defeat the use of a GroupAllocator
+ for OldRef structures. And that's important for performance. So
+ it's not straightforward to do.
+*/
+
+
+static UWord stats__ctxt_rcdec1 = 0;
+static UWord stats__ctxt_rcdec2 = 0;
+static UWord stats__ctxt_rcdec3 = 0;
+static UWord stats__ctxt_rcdec_calls = 0;
+static UWord stats__ctxt_rcdec_discards = 0;
+static UWord stats__ctxt_rcdec1_eq = 0;
+
+static UWord stats__ctxt_tab_curr = 0;
+static UWord stats__ctxt_tab_max = 0;
+
+static UWord stats__ctxt_tab_qs = 0;
+static UWord stats__ctxt_tab_cmps = 0;
+
+
+///////////////////////////////////////////////////////
+//// Part (1): An OSet of RCECs
+///
+
+#define N_FRAMES 8
+
+// (UInt) `echo "Reference Counted Execution Context" | md5sum`
+#define RCEC_MAGIC 0xab88abb2UL
+
+//#define N_RCEC_TAB 98317 /* prime */
+#define N_RCEC_TAB 196613 /* prime */
+
+typedef
+ struct _RCEC {
+ UWord magic; /* sanity check only */
+ struct _RCEC* next;
+ UWord rc;
+ UWord rcX; /* used for crosschecking */
+ UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
+ }
+ RCEC;
+
+static RCEC** contextTab = NULL; /* hash table of RCEC*s */
+
+
+/* Gives an arbitrary total order on RCEC .frames fields */
+static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
+ Word i;
+ tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
+ tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
+ if (ec1->frames[0] < ec2->frames[0]) return -1;
+ if (ec1->frames[0] > ec2->frames[0]) return 1;
+ for (i = 1; i < 1 + N_FRAMES; i++) {
+ if (ec1->frames[i] < ec2->frames[i]) return -1;
+ if (ec1->frames[i] > ec2->frames[i]) return 1;
+ }
+ return 0;
+}
+
+
+/* Dec the ref of this RCEC. */
+static void ctxt__rcdec ( RCEC* ec )
+{
+ stats__ctxt_rcdec_calls++;
+ tl_assert(ec && ec->magic == RCEC_MAGIC);
+ tl_assert(ec->rc > 0);
+ ec->rc--;
+}
+
+static void ctxt__rcinc ( RCEC* ec )
+{
+ tl_assert(ec && ec->magic == RCEC_MAGIC);
+ ec->rc++;
+}
+
+
+//////////// BEGIN RCEC group allocator
+static GroupAlloc rcec_group_allocator;
+
+static RCEC* alloc_RCEC ( void ) {
+ return gal_Alloc ( &rcec_group_allocator );
+}
+
+static void free_RCEC ( RCEC* rcec ) {
+ tl_assert(rcec->magic == RCEC_MAGIC);
+ gal_Free( &rcec_group_allocator, rcec );
+}
+//////////// END OldRef group allocator
+
+
+/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
+ move it one step closer the the front of the list, so as to make
+ subsequent searches for it cheaper. */
+static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
+{
+ RCEC *ec0, *ec1, *ec2;
+ if (ec == *headp)
+ tl_assert(0); /* already at head of list */
+ tl_assert(ec != NULL);
+ ec0 = *headp;
+ ec1 = NULL;
+ ec2 = NULL;
+ while (True) {
+ if (ec0 == NULL || ec0 == ec) break;
+ ec2 = ec1;
+ ec1 = ec0;
+ ec0 = ec0->next;
+ }
+ tl_assert(ec0 == ec);
+ if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
+ RCEC* tmp;
+ /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
+ predecessor. Swap ec0 and ec1, that is, move ec0 one step
+ closer to the start of the list. */
+ tl_assert(ec2->next == ec1);
+ tl_assert(ec1->next == ec0);
+ tmp = ec0->next;
+ ec2->next = ec0;
+ ec0->next = ec1;
+ ec1->next = tmp;
+ }
+ else
+ if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
+ /* it's second in the list. */
+ tl_assert(*headp == ec1);
+ tl_assert(ec1->next == ec0);
+ ec1->next = ec0->next;
+ ec0->next = ec1;
+ *headp = ec0;
+ }
+}
+
+
+/* Find the given RCEC in the tree, and return a pointer to it. Or,
+ if not present, add the given one to the tree (by making a copy of
+ it, so the caller can immediately deallocate the original) and
+ return a pointer to the copy. The caller can safely have 'example'
+ on its stack, since we will always return a pointer to a copy of
+ it, not to the original. Note that the inserted node will have .rc
+ of zero and so the caller must immediatly increment it. */
+__attribute__((noinline))
+static RCEC* ctxt__find_or_add ( RCEC* example )
+{
+ UWord hent;
+ RCEC* copy;
+ tl_assert(example && example->magic == RCEC_MAGIC);
+ tl_assert(example->rc == 0);
+
+ /* Search the hash table to see if we already have it. */
+ stats__ctxt_tab_qs++;
+ hent = example->frames[0] % N_RCEC_TAB;
+ copy = contextTab[hent];
+ while (1) {
+ if (!copy) break;
+ tl_assert(copy->magic == RCEC_MAGIC);
+ stats__ctxt_tab_cmps++;
+ if (0 == RCEC__cmp_by_frames(copy, example)) break;
+ copy = copy->next;
+ }
+
+ if (copy) {
+ tl_assert(copy != example);
+ /* optimisation: if it's not at the head of its list, move 1
+ step fwds, to make future searches cheaper */
+ if (copy != contextTab[hent]) {
+ move_RCEC_one_step_forward( &contextTab[hent], copy );
+ }
+ } else {
+ copy = alloc_RCEC();
+ tl_assert(copy != example);
+ *copy = *example;
+ copy->next = contextTab[hent];
+ contextTab[hent] = copy;
+ stats__ctxt_tab_curr++;
+ if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
+ stats__ctxt_tab_max = stats__ctxt_tab_curr;
+ }
+ return copy;
+}
+
+static inline UWord ROLW ( UWord w, Int n )
+{
+ Int bpw = 8 * sizeof(UWord);
+ w = (w << n) | (w >> (bpw-n));
+ return w;
+}
+
+__attribute__((noinline))
+static RCEC* get_RCEC ( Thr* thr )
+{
+ UWord hash, i;
+ RCEC example;
+ example.magic = RCEC_MAGIC;
+ example.rc = 0;
+ example.rcX = 0;
+ main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
+ hash = 0;
+ for (i = 1; i < 1 + N_FRAMES; i++) {
+ hash ^= example.frames[i];
+ hash = ROLW(hash, 19);
+ }
+ example.frames[0] = hash;
+ return ctxt__find_or_add( &example );
+}
+
+///////////////////////////////////////////////////////
+//// Part (2):
+/// A SparseWA guest-addr -> OldRef, that refers to (1)
+///
+
+// (UInt) `echo "Old Reference Information" | md5sum`
+#define OldRef_MAGIC 0x30b1f075UL
+
+/* Records an access: a thread and a context. The size
+ (1,2,4,8) and read-or-writeness are also encoded as
+ follows: bottom bit of .thr is 1 if write, 0 if read
+ bottom 2 bits of .rcec are encode size:
+ 00 = 1, 01 = 2, 10 = 4, 11 = 8
+*/
+typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
+
+#define N_OLDREF_ACCS 5
+
+typedef
+ struct {
+ UWord magic; /* sanity check only */
+ UWord gen; /* when most recently accessed */
+ /* or free list when not in use */
+ /* unused slots in this array have .thr == NULL */
+ Thr_n_RCEC accs[N_OLDREF_ACCS];
+ }
+ OldRef;
+
+
+//////////// BEGIN OldRef group allocator
+static GroupAlloc oldref_group_allocator;
+
+static OldRef* alloc_OldRef ( void ) {
+ return gal_Alloc ( &oldref_group_allocator );
+}
+
+static void free_OldRef ( OldRef* r ) {
+ tl_assert(r->magic == OldRef_MAGIC);
+ gal_Free( &oldref_group_allocator, r );
+}
+//////////// END OldRef group allocator
+
+
+static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
+static UWord oldrefGen = 0; /* current LRU generation # */
+static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
+static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
+
+inline static void* ptr_or_UWord ( void* p, UWord w ) {
+ return (void*)( ((UWord)p) | ((UWord)w) );
+}
+inline static void* ptr_and_UWord ( void* p, UWord w ) {
+ return (void*)( ((UWord)p) & ((UWord)w) );
+}
+
+inline static UInt min_UInt ( UInt a, UInt b ) {
+ return a < b ? a : b;
+}
+
+/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
+ first interval is lower, 1 if the first interval is higher, and 0
+ if there is any overlap. Redundant paranoia with casting is there
+ following what looked distinctly like a bug in gcc-4.1.2, in which
+ some of the comparisons were done signedly instead of
+ unsignedly. */
+/* Copied from exp-ptrcheck/sg_main.c */
+static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
+ Addr a2, SizeT n2 ) {
+ UWord a1w = (UWord)a1;
+ UWord n1w = (UWord)n1;
+ UWord a2w = (UWord)a2;
+ UWord n2w = (UWord)n2;
+ tl_assert(n1w > 0 && n2w > 0);
+ if (a1w + n1w <= a2w) return -1L;
+ if (a2w + n2w <= a1w) return 1L;
+ return 0;
+}
+
+static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
+{
+ OldRef* ref;
+ RCEC* rcec;
+ Word i, j;
+ UWord keyW, valW;
+ Bool b;
+
+ rcec = get_RCEC( thr );
+ ctxt__rcinc(rcec);
+
+ /* encode the size and writeness of the transaction in the bottom
+ two bits of thr and rcec. */
+ thr = ptr_or_UWord(thr, isW ? 1 : 0);
+ switch (szB) {
+ /* This doesn't look particularly branch-predictor friendly. */
+ case 1: rcec = ptr_or_UWord(rcec, 0); break;
+ case 2: rcec = ptr_or_UWord(rcec, 1); break;
+ case 4: rcec = ptr_or_UWord(rcec, 2); break;
+ case 8: rcec = ptr_or_UWord(rcec, 3); break;
+ default: tl_assert(0);
+ }
+
+ /* Look in the map to see if we already have this. */
+ b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
+
+ if (b) {
+
+ /* We already have a record for this address. We now need to
+ see if we have a stack trace pertaining to this (thread, R/W,
+ size) triple. */
+ tl_assert(keyW == a);
+ ref = (OldRef*)valW;
+ tl_assert(ref->magic == OldRef_MAGIC);
+
+ tl_assert(thr);
+ for (i = 0; i < N_OLDREF_ACCS; i++) {
+ if (ref->accs[i].thr != thr)
+ continue;
+ /* since .thr encodes both the accessing thread and the
+ read/writeness, we know now that at least those features
+ of the access match this entry. So we just need to check
+ the size indication. Do this by inspecting the lowest 2 bits of
+ .rcec, which contain the encoded size info. */
+ if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
+ continue;
+ /* else we have a match, so stop looking. */
+ break;
+ }
+
+ if (i < N_OLDREF_ACCS) {
+ /* thread 'thr' has an entry at index 'i'. Update it. */
+ if (i > 0) {
+ Thr_n_RCEC tmp = ref->accs[i-1];
+ ref->accs[i-1] = ref->accs[i];
+ ref->accs[i] = tmp;
+ i--;
+ }
+ if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
+ stats__ctxt_rcdec1++;
+ ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
+ ref->accs[i].rcec = rcec;
+ tl_assert(ref->accs[i].thr == thr);
+ } else {
+ /* No entry for this (thread, R/W, size) triple. Shuffle all
+ of them down one slot, and put the new entry at the start
+ of the array. */
+ if (ref->accs[N_OLDREF_ACCS-1].thr) {
+ /* the last slot is in use. We must dec the rc on the
+ associated rcec. */
+ tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
+ stats__ctxt_rcdec2++;
+ if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
+ VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
+ ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
+ } else {
+ tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
+ }
+ for (j = N_OLDREF_ACCS-1; j >= 1; j--)
+ ref->accs[j] = ref->accs[j-1];
+ ref->accs[0].thr = thr;
+ ref->accs[0].rcec = rcec;
+ /* thr==NULL is used to signify an empty slot, so we can't
+ add a NULL thr. */
+ tl_assert(ptr_and_UWord(thr, ~3) != 0);
+ }
+
+ ref->gen = oldrefGen;
+
+ } else {
+
+ /* We don't have a record for this address. Create a new one. */
+ if (oldrefTreeN >= oldrefGenIncAt) {
+ oldrefGen++;
+ oldrefGenIncAt = oldrefTreeN + 50000;
+ if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
+ oldrefGen, oldrefTreeN );
+ }
+
+ ref = alloc_OldRef();
+ ref->magic = OldRef_MAGIC;
+ ref->gen = oldrefGen;
+ ref->accs[0].rcec = rcec;
+ ref->accs[0].thr = thr;
+ /* thr==NULL is used to signify an empty slot, so we can't add a
+ NULL thr. */
+ tl_assert(ptr_and_UWord(thr, ~3) != 0);
+ for (j = 1; j < N_OLDREF_ACCS; j++) {
+ ref->accs[j].thr = NULL;
+ ref->accs[j].rcec = NULL;
+ }
+ VG_(addToSWA)( oldrefTree, a, (UWord)ref );
+ oldrefTreeN++;
+
+ }
+}
+
+
+Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
+ /*OUT*/Thr** resThr,
+ /*OUT*/SizeT* resSzB,
+ /*OUT*/Bool* resIsW,
+ Thr* thr, Addr a, SizeT szB, Bool isW )
+{
+ Word i, j;
+ OldRef* ref;
+ UWord keyW, valW;
+ Bool b;
+
+ Thr* cand_thr;
+ RCEC* cand_rcec;
+ Bool cand_isW;
+ SizeT cand_szB;
+ Addr cand_a;
+
+ Addr toCheck[15];
+ Int nToCheck = 0;
+
+ tl_assert(thr);
+ tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
+
+ toCheck[nToCheck++] = a;
+ for (i = -7; i < (Word)szB; i++) {
+ if (i != 0)
+ toCheck[nToCheck++] = a + i;
+ }
+ tl_assert(nToCheck <= 15);
+
+ /* Now see if we can find a suitable matching event for
+ any of the addresses in toCheck[0 .. nToCheck-1]. */
+ for (j = 0; j < nToCheck; j++) {
+
+ cand_a = toCheck[j];
+ // VG_(printf)("test %ld %p\n", j, cand_a);
+
+ b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
+ if (!b)
+ continue;
+
+ ref = (OldRef*)valW;
+ tl_assert(keyW == cand_a);
+ tl_assert(ref->magic == OldRef_MAGIC);
+ tl_assert(ref->accs[0].thr); /* first slot must always be used */
+
+ cand_thr = NULL;
+ cand_rcec = NULL;
+ cand_isW = False;
+ cand_szB = 0;
+
+ for (i = 0; i < N_OLDREF_ACCS; i++) {
+ Thr_n_RCEC* cand = &ref->accs[i];
+ cand_thr = ptr_and_UWord(cand->thr, ~3);
+ cand_rcec = ptr_and_UWord(cand->rcec, ~3);
+ /* Decode the writeness from the bottom bit of .thr. */
+ cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
+ /* Decode the size from the bottom two bits of .rcec. */
+ switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
+ case 0: cand_szB = 1; break;
+ case 1: cand_szB = 2; break;
+ case 2: cand_szB = 4; break;
+ case 3: cand_szB = 8; break;
+ default: tl_assert(0);
+ }
+
+ if (cand_thr == NULL)
+ /* This slot isn't in use. Ignore it. */
+ continue;
+
+ if (cand_thr == thr)
+ /* This is an access by the same thread, but we're only
+ interested in accesses from other threads. Ignore. */
+ continue;
+
+ if ((!cand_isW) && (!isW))
+ /* We don't want to report a read racing against another
+ read; that's stupid. So in this case move on. */
+ continue;
+
+ if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
+ /* No overlap with the access we're asking about. Ignore. */
+ continue;
+
+ /* We have a match. Stop searching. */
+ break;
+ }
+
+ tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
+
+ if (i < N_OLDREF_ACCS) {
+ /* return with success */
+ tl_assert(cand_thr);
+ tl_assert(cand_rcec);
+ tl_assert(cand_rcec->magic == RCEC_MAGIC);
+ tl_assert(cand_szB >= 1);
+ *resEC = VG_(make_ExeContext_from_StackTrace)(
+ &cand_rcec->frames[1],
+ min_UInt(N_FRAMES, VG_(clo_backtrace_size))
+ );
+ *resThr = cand_thr;
+ *resSzB = cand_szB;
+ *resIsW = cand_isW;
+ return True;
+ }
+
+ /* consider next address in toCheck[] */
+ } /* for (j = 0; j < nToCheck; j++) */
+
+ /* really didn't find anything. */
+ return False;
+}
+
+static void event_map_init ( void )
+{
+ Word i;
+
+ /* Context (RCEC) group allocator */
+ init_GroupAlloc ( &rcec_group_allocator,
+ sizeof(RCEC),
+ 1000 /* RCECs per group */,
+ HG_(zalloc),
+ "libhb.event_map_init.1 (RCEC groups)",
+ HG_(free) );
+
+ /* Context table */
+ tl_assert(!contextTab);
+ contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
+ N_RCEC_TAB * sizeof(RCEC*) );
+ tl_assert(contextTab);
+ for (i = 0; i < N_RCEC_TAB; i++)
+ contextTab[i] = NULL;
+
+ /* Oldref group allocator */
+ init_GroupAlloc ( &oldref_group_allocator,
+ sizeof(OldRef),
+ 1000 /* OldRefs per group */,
+ HG_(zalloc),
+ "libhb.event_map_init.3 (OldRef groups)",
+ HG_(free) );
+
+ /* Oldref tree */
+ tl_assert(!oldrefTree);
+ oldrefTree = VG_(newSWA)(
+ HG_(zalloc),
+ "libhb.event_map_init.4 (oldref tree)",
+ HG_(free)
+ );
+ tl_assert(oldrefTree);
+
+ oldrefGen = 0;
+ oldrefGenIncAt = 0;
+ oldrefTreeN = 0;
+}
+
+static void event_map__check_reference_counts ( Bool before )
+{
+ RCEC* rcec;
+ OldRef* oldref;
+ Word i;
+ UWord nEnts = 0;
+ UWord keyW, valW;
+
+ /* Set the 'check' reference counts to zero. Also, optionally
+ check that the real reference counts are non-zero. We allow
+ these to fall to zero before a GC, but the GC must get rid of
+ all those that are zero, hence none should be zero after a
+ GC. */
+ for (i = 0; i < N_RCEC_TAB; i++) {
+ for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
+ nEnts++;
+ tl_assert(rcec);
+ tl_assert(rcec->magic == RCEC_MAGIC);
+ if (!before)
+ tl_assert(rcec->rc > 0);
+ rcec->rcX = 0;
+ }
+ }
+
+ /* check that the stats are sane */
+ tl_assert(nEnts == stats__ctxt_tab_curr);
+ tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
+
+ /* visit all the referencing points, inc check ref counts */
+ VG_(initIterSWA)( oldrefTree );
+ while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
+ oldref = (OldRef*)valW;
+ tl_assert(oldref->magic == OldRef_MAGIC);
+ for (i = 0; i < N_OLDREF_ACCS; i++) {
+ Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
+ RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
+ if (aThr) {
+ tl_assert(aRef);
+ tl_assert(aRef->magic == RCEC_MAGIC);
+ aRef->rcX++;
+ } else {
+ tl_assert(!aRef);
+ }
+ }
+ }
+
+ /* compare check ref counts with actual */
+ for (i = 0; i < N_RCEC_TAB; i++) {
+ for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
+ tl_assert(rcec->rc == rcec->rcX);
+ }
+ }
+}
+
+__attribute__((noinline))
+static void event_map_maybe_GC ( void )
+{
+ OldRef* oldref;
+ UWord keyW, valW, retained, maxGen;
+ XArray* refs2del;
+ Word i, j, n2del;
+
+ UWord* genMap = NULL;
+ UWord genMap_min = 0;
+ UWord genMap_size = 0;
+
+ if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
+ return;
+
+ if (0)
+ VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
+
+ /* Check for sane command line params. Limit values must match
+ those in hg_process_cmd_line_option. */
+ tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
+ tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 );
+
+ /* Check our counting is sane (expensive) */
+ if (CHECK_CEM)
+ tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
+
+ /* Check the reference counts (expensive) */
+ if (CHECK_CEM)
+ event_map__check_reference_counts( True/*before*/ );
+
+ /* Compute the distribution of generation values in the ref tree.
+ There are likely only to be a few different generation numbers
+ in the whole tree, but we don't know what they are. Hence use a
+ dynamically resized array of counters. The array is genMap[0
+ .. genMap_size-1], where genMap[0] is the count for the
+ generation number genMap_min, genMap[1] is the count for
+ genMap_min+1, etc. If a new number is seen outside the range
+ [genMap_min .. genMap_min + genMap_size - 1] then the array is
+ copied into a larger array, and genMap_min and genMap_size are
+ adjusted accordingly. */
+
+ /* genMap :: generation-number -> count-of-nodes-with-that-number */
+
+ VG_(initIterSWA)( oldrefTree );
+ while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
+
+ UWord ea, key;
+ oldref = (OldRef*)valW;
+ key = oldref->gen;
+
+ /* BEGIN find 'ea', which is the index in genMap holding the
+ count for generation number 'key'. */
+ if (UNLIKELY(genMap == NULL)) {
+ /* deal with the first key to be seen, so that the following
+ cases don't need to handle the complexity of a NULL count
+ array. */
+ genMap_min = key;
+ genMap_size = 1;
+ genMap = HG_(zalloc)( "libhb.emmG.1a",
+ genMap_size * sizeof(UWord) );
+ ea = 0;
+ if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
+ key, genMap_min, genMap_min+genMap_size- 1 );
+ }
+ else
+ if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
+ /* this is the expected (almost-always-happens) case: 'key'
+ is already mapped in the array. */
+ ea = key - genMap_min;
+ }
+ else
+ if (key < genMap_min) {
+ /* 'key' appears before the start of the current array.
+ Extend the current array by allocating a larger one and
+ copying the current one to the upper end of it. */
+ Word more;
+ UWord* map2;
+ more = genMap_min - key;
+ tl_assert(more > 0);
+ map2 = HG_(zalloc)( "libhb.emmG.1b",
+ (genMap_size + more) * sizeof(UWord) );
+ VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
+ HG_(free)( genMap );
+ genMap = map2;
+ genMap_size += more;
+ genMap_min -= more;
+ ea = 0;
+ tl_assert(genMap_min == key);
+ if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
+ key, genMap_min, genMap_min+genMap_size- 1 );
+ }
+ else {
+ /* 'key' appears after the end of the current array. Extend
+ the current array by allocating a larger one and copying
+ the current one to the lower end of it. */
+ Word more;
+ UWord* map2;
+ tl_assert(key >= genMap_min + genMap_size);
+ more = key - (genMap_min + genMap_size) + 1;
+ tl_assert(more > 0);
+ map2 = HG_(zalloc)( "libhb.emmG.1c",
+ (genMap_size + more) * sizeof(UWord) );
+ VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
+ HG_(free)( genMap );
+ genMap = map2;
+ genMap_size += more;
+ ea = genMap_size - 1;;
+ tl_assert(genMap_min + genMap_size - 1 == key);
+ if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
+ key, genMap_min, genMap_min+genMap_size- 1 );
+ }
+ /* END find 'ea' from 'key' */
+
+ tl_assert(ea >= 0 && ea < genMap_size);
+ /* and the whole point of this elaborate computation of 'ea' is .. */
+ genMap[ea]++;
+ }
+
+ tl_assert(genMap);
+ tl_assert(genMap_size > 0);
+
+ /* Sanity check what we just computed */
+ { UWord sum = 0;
+ for (i = 0; i < genMap_size; i++) {
+ if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
+ i + genMap_min, genMap[i] );
+ sum += genMap[i];
+ }
+ tl_assert(sum == oldrefTreeN);
+ }
+
+ /* Figure out how many generations to throw away */
+ retained = oldrefTreeN;
+ maxGen = 0;
+
+ for (i = 0; i < genMap_size; i++) {
+ keyW = i + genMap_min;
+ valW = genMap[i];
+ tl_assert(keyW > 0); /* can't allow a generation # 0 */
+ if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
+ tl_assert(keyW >= maxGen);
+ tl_assert(retained >= valW);
+ if (retained - valW
+ > (UWord)(HG_(clo_conflict_cache_size)
+ * EVENT_MAP_GC_DISCARD_FRACTION)) {
+ retained -= valW;
+ maxGen = keyW;
+ } else {
+ break;
+ }
+ }
+
+ HG_(free)(genMap);
+
+ tl_assert(retained >= 0 && retained <= oldrefTreeN);
+
+ /* Now make up a big list of the oldrefTree entries we want to
+ delete. We can't simultaneously traverse the tree and delete
+ stuff from it, so first we need to copy them off somewhere
+ else. (sigh) */
+ refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
+ HG_(free), sizeof(Addr) );
+
+ if (retained < oldrefTreeN) {
+
+ /* This is the normal (expected) case. We discard any ref whose
+ generation number <= maxGen. */
+ VG_(initIterSWA)( oldrefTree );
+ while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
+ oldref = (OldRef*)valW;
+ tl_assert(oldref->magic == OldRef_MAGIC);
+ if (oldref->gen <= maxGen) {
+ VG_(addToXA)( refs2del, &keyW );
+ }
+ }
+ if (VG_(clo_verbosity) > 1) {
+ VG_(message)(Vg_DebugMsg,
+ "libhb: EvM GC: delete generations %lu and below, "
+ "retaining %lu entries",
+ maxGen, retained );
+ }
+
+ } else {
+
+ static UInt rand_seed = 0; /* leave as static */
+
+ /* Degenerate case: there's only one generation in the entire
+ tree, so we need to have some other way of deciding which
+ refs to throw away. Just throw out half of them randomly. */
+ tl_assert(retained == oldrefTreeN);
+ VG_(initIterSWA)( oldrefTree );
+ while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
+ UInt n;
+ oldref = (OldRef*)valW;
+ tl_assert(oldref->magic == OldRef_MAGIC);
+ n = VG_(random)( &rand_seed );
+ if ((n & 0xFFF) < 0x800) {
+ VG_(addToXA)( refs2del, &keyW );
+ retained--;
+ }
+ }
+ if (VG_(clo_verbosity) > 1) {
+ VG_(message)(Vg_DebugMsg,
+ "libhb: EvM GC: randomly delete half the entries, "
+ "retaining %lu entries",
+ retained );
+ }
+
+ }
+
+ n2del = VG_(sizeXA)( refs2del );
+ tl_assert(n2del == (Word)(oldrefTreeN - retained));
+
+ if (0) VG_(printf)("%s","deleting entries\n");
+ for (i = 0; i < n2del; i++) {
+ Bool b;
+ Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
+ b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
+ tl_assert(b);
+ tl_assert(keyW == ga2del);
+ oldref = (OldRef*)valW;
+ for (j = 0; j < N_OLDREF_ACCS; j++) {
+ Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
+ RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
+ if (aRef) {
+ tl_assert(aThr);
+ stats__ctxt_rcdec3++;
+ ctxt__rcdec( aRef );
+ } else {
+ tl_assert(!aThr);
+ }
+ }
+
+ free_OldRef( oldref );
+ }
+
+ VG_(deleteXA)( refs2del );
+
+ tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
+
+ oldrefTreeN = retained;
+ oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
+
+ /* Throw away all RCECs with zero reference counts */
+ for (i = 0; i < N_RCEC_TAB; i++) {
+ RCEC** pp = &contextTab[i];
+ RCEC* p = *pp;
+ while (p) {
+ if (p->rc == 0) {
+ *pp = p->next;
+ free_RCEC(p);
+ p = *pp;
+ tl_assert(stats__ctxt_tab_curr > 0);
+ stats__ctxt_tab_curr--;
+ } else {
+ pp = &p->next;
+ p = p->next;
+ }
+ }
+ }
+
+ /* Check the reference counts (expensive) */
+ if (CHECK_CEM)
+ event_map__check_reference_counts( False/*after*/ );
+
+ //if (0)
+ //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
+ // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
+
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Core MSM //
+// //
+/////////////////////////////////////////////////////////
+
+/* Logic in msm_read/msm_write updated/verified after re-analysis,
+ 19 Nov 08. */
+
+/* 19 Nov 08: it seems that MSM_RACE2ERR == 1 is a bad idea. When
+ nonzero, the effect is that when a race is detected for a location,
+ that location is put into a special 'error' state and no further
+ checking of it is done until it returns to a 'normal' state, which
+ requires it to be deallocated and reallocated.
+
+ This is a bad idea, because of the interaction with suppressions.
+ Suppose there is a race on the location, but the error is
+ suppressed. The location now is marked as in-error. Now any
+ subsequent race -- including ones we want to see -- will never be
+ detected until the location is deallocated and reallocated.
+
+ Hence set MSM_RACE2ERR to zero. This causes raced-on locations to
+ remain in the normal 'C' (constrained) state, but places on them
+ the constraint that the next accesses happen-after both the
+ existing constraint and the relevant vector clock of the thread
+ doing the racing access.
+*/
+#define MSM_RACE2ERR 0
+
+static ULong stats__msm_read = 0;
+static ULong stats__msm_read_change = 0;
+static ULong stats__msm_write = 0;
+static ULong stats__msm_write_change = 0;
+
+__attribute__((noinline))
+static void record_race_info ( Thr* acc_thr,
+ Addr acc_addr, SizeT szB, Bool isWrite )
+{
+ /* Call here to report a race. We just hand it onwards to
+ HG_(record_error_Race). If that in turn discovers that the
+ error is going to be collected, then that queries the
+ conflicting-event map. The alternative would be to query it
+ right here. But that causes a lot of pointless queries for
+ errors which will shortly be discarded as duplicates, and can
+ become a performance overhead; so we defer the query until we
+ know the error is not a duplicate. */
+ tl_assert(acc_thr->opaque);
+ HG_(record_error_Race)( acc_thr->opaque, acc_addr,
+ szB, isWrite, NULL/*mb_lastlock*/ );
+}
+
+static Bool is_sane_SVal_C ( SVal sv ) {
+ POrd ord;
+ if (!SVal__isC(sv)) return True;
+ ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
+ if (ord == POrd_EQ || ord == POrd_LT) return True;
+ return False;
+}
+
+
+/* Compute new state following a read */
+static inline SVal msm_read ( SVal svOld,
+ /* The following are only needed for
+ creating error reports. */
+ Thr* acc_thr,
+ Addr acc_addr, SizeT szB )
+{
+ SVal svNew = SVal_INVALID;
+ stats__msm_read++;
+
+ /* Redundant sanity check on the constraints */
+ if (CHECK_MSM) {
+ tl_assert(is_sane_SVal_C(svOld));
+ }
+
+ if (SVal__isC(svOld)) {
+ POrd ord;
+ VtsID tviR = acc_thr->viR;
+ VtsID tviW = acc_thr->viW;
+ VtsID rmini = SVal__unC_Rmin(svOld);
+ VtsID wmini = SVal__unC_Wmin(svOld);
+
+ ord = VtsID__getOrdering(rmini,tviR);
+ if (ord == POrd_EQ || ord == POrd_LT) {
+ /* no race */
+ /* Note: RWLOCK subtlety: use tviW, not tviR */
+ svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
+ goto out;
+ } else {
+ /* assert on sanity of constraints. */
+ POrd ordxx = VtsID__getOrdering(rmini,wmini);
+ tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
+ svNew = MSM_RACE2ERR
+ ? SVal__mkE()
+ /* see comments on corresponding fragment in
+ msm_write for explanation. */
+ /* aggressive setting: */
+ /*
+ : SVal__mkC( VtsID__join2(wmini,tviR),
+ VtsID__join2(wmini,tviW) );
+ */
+ /* "consistent" setting: */
+ : SVal__mkC( VtsID__join2(rmini,tviR),
+ VtsID__join2(wmini,tviW) );
+ record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/ );
+ goto out;
+ }
+ }
+ if (SVal__isA(svOld)) {
+ /* reading no-access memory (sigh); leave unchanged */
+ /* check for no pollution */
+ tl_assert(svOld == SVal_NOACCESS);
+ svNew = SVal_NOACCESS;
+ goto out;
+ }
+ if (SVal__isE(svOld)) {
+ /* no race, location is already "in error" */
+ svNew = SVal__mkE();
+ goto out;
+ }
+ VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
+ tl_assert(0);
+
+ out:
+ if (CHECK_MSM) {
+ tl_assert(is_sane_SVal_C(svNew));
+ }
+ tl_assert(svNew != SVal_INVALID);
+ if (svNew != svOld && HG_(clo_show_conflicts)) {
+ if (SVal__isC(svOld) && SVal__isC(svNew)) {
+ event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
+ stats__msm_read_change++;
+ }
+ }
+ return svNew;
+}
+
+
+/* Compute new state following a write */
+static inline SVal msm_write ( SVal svOld,
+ /* The following are only needed for
+ creating error reports. */
+ Thr* acc_thr,
+ Addr acc_addr, SizeT szB )
+{
+ SVal svNew = SVal_INVALID;
+ stats__msm_write++;
+
+ /* Redundant sanity check on the constraints */
+ if (CHECK_MSM) {
+ tl_assert(is_sane_SVal_C(svOld));
+ }
+
+ if (SVal__isC(svOld)) {
+ POrd ord;
+ VtsID tviW = acc_thr->viW;
+ VtsID wmini = SVal__unC_Wmin(svOld);
+
+ ord = VtsID__getOrdering(wmini,tviW);
+ if (ord == POrd_EQ || ord == POrd_LT) {
+ /* no race */
+ svNew = SVal__mkC( tviW, tviW );
+ goto out;
+ } else {
+ VtsID tviR = acc_thr->viR;
+ VtsID rmini = SVal__unC_Rmin(svOld);
+ /* assert on sanity of constraints. */
+ POrd ordxx = VtsID__getOrdering(rmini,wmini);
+ tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
+ svNew = MSM_RACE2ERR
+ ? SVal__mkE()
+ /* One possibility is, after a race is seen, to
+ set the location's constraints as aggressively
+ (as far ahead) as possible. However, that just
+ causes lots more races to be reported, which is
+ very confusing. Hence don't do this. */
+ /*
+ : SVal__mkC( VtsID__join2(wmini,tviR),
+ VtsID__join2(wmini,tviW) );
+ */
+ /* instead, re-set the constraints in a way which
+ is consistent with (ie, as they would have been
+ computed anyway) had no race been detected. */
+ : SVal__mkC( VtsID__join2(rmini,tviR),
+ VtsID__join2(wmini,tviW) );
+ record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/ );
+ goto out;
+ }
+ }
+ if (SVal__isA(svOld)) {
+ /* writing no-access memory (sigh); leave unchanged */
+ /* check for no pollution */
+ tl_assert(svOld == SVal_NOACCESS);
+ svNew = SVal_NOACCESS;
+ goto out;
+ }
+ if (SVal__isE(svOld)) {
+ /* no race, location is already "in error" */
+ svNew = SVal__mkE();
+ goto out;
+ }
+ VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
+ tl_assert(0);
+
+ out:
+ if (CHECK_MSM) {
+ tl_assert(is_sane_SVal_C(svNew));
+ }
+ tl_assert(svNew != SVal_INVALID);
+ if (svNew != svOld && HG_(clo_show_conflicts)) {
+ if (SVal__isC(svOld) && SVal__isC(svNew)) {
+ event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
+ stats__msm_write_change++;
+ }
+ }
+ return svNew;
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Apply core MSM to specific memory locations //
+// //
+/////////////////////////////////////////////////////////
+
+/*------------- ZSM accesses: 8 bit apply ------------- */
+
+void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read8s++;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 .. 7 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_read( svOld, thr,a,1 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+}
+
+void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read8s++;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 .. 7 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_write( svOld, thr,a,1 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+}
+
+/*------------- ZSM accesses: 16 bit apply ------------- */
+
+void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read16s++;
+ if (UNLIKELY(!aligned16(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+ if (valid_value_is_below_me_16(descr, toff)) {
+ goto slowcase;
+ } else {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+ }
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_read( svOld, thr,a,2 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_16to8splits++;
+ zsm_apply8___msm_read( thr, a + 0 );
+ zsm_apply8___msm_read( thr, a + 1 );
+}
+
+void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read16s++;
+ if (UNLIKELY(!aligned16(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+ if (valid_value_is_below_me_16(descr, toff)) {
+ goto slowcase;
+ } else {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+ }
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_write( svOld, thr,a,2 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_16to8splits++;
+ zsm_apply8___msm_write( thr, a + 0 );
+ zsm_apply8___msm_write( thr, a + 1 );
+}
+
+/*------------- ZSM accesses: 32 bit apply ------------- */
+
+void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ if (UNLIKELY(!aligned32(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 or 4 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+ if (valid_value_is_above_me_32(descr, toff)) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+ } else {
+ goto slowcase;
+ }
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_read( svOld, thr,a,4 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_32to16splits++;
+ zsm_apply16___msm_read( thr, a + 0 );
+ zsm_apply16___msm_read( thr, a + 2 );
+}
+
+void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ SVal svOld, svNew;
+ UShort descr;
+ if (UNLIKELY(!aligned32(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 or 4 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+ if (valid_value_is_above_me_32(descr, toff)) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+ } else {
+ goto slowcase;
+ }
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_write( svOld, thr,a,4 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_32to16splits++;
+ zsm_apply16___msm_write( thr, a + 0 );
+ zsm_apply16___msm_write( thr, a + 2 );
+}
+
+/*------------- ZSM accesses: 64 bit apply ------------- */
+
+void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno;
+ //UWord toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read64s++;
+ if (UNLIKELY(!aligned64(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ //toff = get_tree_offset(a); /* == 0, unused */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
+ goto slowcase;
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_read( svOld, thr,a,8 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_64to32splits++;
+ zsm_apply32___msm_read( thr, a + 0 );
+ zsm_apply32___msm_read( thr, a + 4 );
+}
+
+void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno;
+ //UWord toff;
+ SVal svOld, svNew;
+ UShort descr;
+ stats__cline_read64s++;
+ if (UNLIKELY(!aligned64(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ //toff = get_tree_offset(a); /* == 0, unused */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
+ goto slowcase;
+ }
+ svOld = cl->svals[cloff];
+ svNew = msm_write( svOld, thr,a,8 );
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+ return;
+ slowcase: /* misaligned, or must go further down the tree */
+ stats__cline_64to32splits++;
+ zsm_apply32___msm_write( thr, a + 0 );
+ zsm_apply32___msm_write( thr, a + 4 );
+}
+
+/*--------------- ZSM accesses: 8 bit write --------------- */
+
+static
+void zsm_write8 ( Addr a, SVal svNew ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ UShort descr;
+ stats__cline_set8s++;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 .. 7 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff] = svNew;
+}
+
+/*--------------- ZSM accesses: 16 bit write --------------- */
+
+static
+void zsm_write16 ( Addr a, SVal svNew ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ UShort descr;
+ stats__cline_set16s++;
+ if (UNLIKELY(!aligned16(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+ if (valid_value_is_below_me_16(descr, toff)) {
+ /* Writing at this level. Need to fix up 'descr'. */
+ cl->descrs[tno] = pullup_descr_to_16(descr, toff);
+ /* At this point, the tree does not match cl->descr[tno] any
+ more. The assignments below will fix it up. */
+ } else {
+ /* We can't indiscriminately write on the w16 node as in the
+ w64 case, as that might make the node inconsistent with
+ its parent. So first, pull down to this level. */
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ }
+ }
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff + 0] = svNew;
+ cl->svals[cloff + 1] = SVal_INVALID;
+ return;
+ slowcase: /* misaligned */
+ stats__cline_16to8splits++;
+ zsm_write8( a + 0, svNew );
+ zsm_write8( a + 1, svNew );
+}
+
+/*--------------- ZSM accesses: 32 bit write --------------- */
+
+static
+void zsm_write32 ( Addr a, SVal svNew ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ UShort descr;
+ stats__cline_set32s++;
+ if (UNLIKELY(!aligned32(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 or 4 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+ if (valid_value_is_above_me_32(descr, toff)) {
+ /* We can't indiscriminately write on the w32 node as in the
+ w64 case, as that might make the node inconsistent with
+ its parent. So first, pull down to this level. */
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+ if (CHECK_ZSM)
+ tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+ } else {
+ /* Writing at this level. Need to fix up 'descr'. */
+ cl->descrs[tno] = pullup_descr_to_32(descr, toff);
+ /* At this point, the tree does not match cl->descr[tno] any
+ more. The assignments below will fix it up. */
+ }
+ }
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff + 0] = svNew;
+ cl->svals[cloff + 1] = SVal_INVALID;
+ cl->svals[cloff + 2] = SVal_INVALID;
+ cl->svals[cloff + 3] = SVal_INVALID;
+ return;
+ slowcase: /* misaligned */
+ stats__cline_32to16splits++;
+ zsm_write16( a + 0, svNew );
+ zsm_write16( a + 2, svNew );
+}
+
+/*--------------- ZSM accesses: 64 bit write --------------- */
+
+static
+void zsm_write64 ( Addr a, SVal svNew ) {
+ CacheLine* cl;
+ UWord cloff, tno;
+ //UWord toff;
+ stats__cline_set64s++;
+ if (UNLIKELY(!aligned64(a))) goto slowcase;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ //toff = get_tree_offset(a); /* == 0, unused */
+ cl->descrs[tno] = TREE_DESCR_64;
+ tl_assert(svNew != SVal_INVALID);
+ cl->svals[cloff + 0] = svNew;
+ cl->svals[cloff + 1] = SVal_INVALID;
+ cl->svals[cloff + 2] = SVal_INVALID;
+ cl->svals[cloff + 3] = SVal_INVALID;
+ cl->svals[cloff + 4] = SVal_INVALID;
+ cl->svals[cloff + 5] = SVal_INVALID;
+ cl->svals[cloff + 6] = SVal_INVALID;
+ cl->svals[cloff + 7] = SVal_INVALID;
+ return;
+ slowcase: /* misaligned */
+ stats__cline_64to32splits++;
+ zsm_write32( a + 0, svNew );
+ zsm_write32( a + 4, svNew );
+}
+
+/*------------- ZSM accesses: 8 bit read/copy ------------- */
+
+static
+SVal zsm_read8 ( Addr a ) {
+ CacheLine* cl;
+ UWord cloff, tno, toff;
+ UShort descr;
+ stats__cline_get8s++;
+ cl = get_cacheline(a);
+ cloff = get_cacheline_offset(a);
+ tno = get_treeno(a);
+ toff = get_tree_offset(a); /* == 0 .. 7 */
+ descr = cl->descrs[tno];
+ if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+ SVal* tree = &cl->svals[tno << 3];
+ cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+ }
+ return cl->svals[cloff];
+}
+
+static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
+ SVal sv;
+ stats__cline_copy8s++;
+ sv = zsm_read8( src );
+ zsm_write8( dst, sv );
+}
+
+/* ------------ Shadow memory range setting ops ------------ */
+
+void zsm_apply_range___msm_read ( Thr* thr,
+ Addr a, SizeT len )
+{
+ /* fast track a couple of common cases */
+ if (len == 4 && aligned32(a)) {
+ zsm_apply32___msm_read( thr, a );
+ return;
+ }
+ if (len == 8 && aligned64(a)) {
+ zsm_apply64___msm_read( thr, a );
+ return;
+ }
+
+ /* be completely general (but as efficient as possible) */
+ if (len == 0) return;
+
+ if (!aligned16(a) && len >= 1) {
+ zsm_apply8___msm_read( thr, a );
+ a += 1;
+ len -= 1;
+ tl_assert(aligned16(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned32(a) && len >= 2) {
+ zsm_apply16___msm_read( thr, a );
+ a += 2;
+ len -= 2;
+ tl_assert(aligned32(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned64(a) && len >= 4) {
+ zsm_apply32___msm_read( thr, a );
+ a += 4;
+ len -= 4;
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 8) {
+ tl_assert(aligned64(a));
+ while (len >= 8) {
+ zsm_apply64___msm_read( thr, a );
+ a += 8;
+ len -= 8;
+ }
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 4)
+ tl_assert(aligned32(a));
+ if (len >= 4) {
+ zsm_apply32___msm_read( thr, a );
+ a += 4;
+ len -= 4;
+ }
+ if (len == 0) return;
+
+ if (len >= 2)
+ tl_assert(aligned16(a));
+ if (len >= 2) {
+ zsm_apply16___msm_read( thr, a );
+ a += 2;
+ len -= 2;
+ }
+ if (len == 0) return;
+
+ if (len >= 1) {
+ zsm_apply8___msm_read( thr, a );
+ //a += 1;
+ len -= 1;
+ }
+ tl_assert(len == 0);
+}
+
+
+
+void zsm_apply_range___msm_write ( Thr* thr,
+ Addr a, SizeT len )
+{
+ /* fast track a couple of common cases */
+ if (len == 4 && aligned32(a)) {
+ zsm_apply32___msm_write( thr, a );
+ return;
+ }
+ if (len == 8 && aligned64(a)) {
+ zsm_apply64___msm_write( thr, a );
+ return;
+ }
+
+ /* be completely general (but as efficient as possible) */
+ if (len == 0) return;
+
+ if (!aligned16(a) && len >= 1) {
+ zsm_apply8___msm_write( thr, a );
+ a += 1;
+ len -= 1;
+ tl_assert(aligned16(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned32(a) && len >= 2) {
+ zsm_apply16___msm_write( thr, a );
+ a += 2;
+ len -= 2;
+ tl_assert(aligned32(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned64(a) && len >= 4) {
+ zsm_apply32___msm_write( thr, a );
+ a += 4;
+ len -= 4;
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 8) {
+ tl_assert(aligned64(a));
+ while (len >= 8) {
+ zsm_apply64___msm_write( thr, a );
+ a += 8;
+ len -= 8;
+ }
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 4)
+ tl_assert(aligned32(a));
+ if (len >= 4) {
+ zsm_apply32___msm_write( thr, a );
+ a += 4;
+ len -= 4;
+ }
+ if (len == 0) return;
+
+ if (len >= 2)
+ tl_assert(aligned16(a));
+ if (len >= 2) {
+ zsm_apply16___msm_write( thr, a );
+ a += 2;
+ len -= 2;
+ }
+ if (len == 0) return;
+
+ if (len >= 1) {
+ zsm_apply8___msm_write( thr, a );
+ //a += 1;
+ len -= 1;
+ }
+ tl_assert(len == 0);
+}
+
+
+
+
+/* Block-copy states (needed for implementing realloc()). */
+
+static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
+{
+ SizeT i;
+ if (len == 0)
+ return;
+
+ /* assert for non-overlappingness */
+ tl_assert(src+len <= dst || dst+len <= src);
+
+ /* To be simple, just copy byte by byte. But so as not to wreck
+ performance for later accesses to dst[0 .. len-1], normalise
+ destination lines as we finish with them, and also normalise the
+ line containing the first and last address. */
+ for (i = 0; i < len; i++) {
+ Bool normalise
+ = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
+ || i == 0 /* first in range */
+ || i == len-1; /* last in range */
+ zsm_copy8( src+i, dst+i, normalise );
+ }
+}
+
+
+/* For setting address ranges to a given value. Has considerable
+ sophistication so as to avoid generating large numbers of pointless
+ cache loads/writebacks for large ranges. */
+
+/* Do small ranges in-cache, in the obvious way. */
+static
+void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
+{
+ /* fast track a couple of common cases */
+ if (len == 4 && aligned32(a)) {
+ zsm_write32( a, svNew );
+ return;
+ }
+ if (len == 8 && aligned64(a)) {
+ zsm_write64( a, svNew );
+ return;
+ }
+
+ /* be completely general (but as efficient as possible) */
+ if (len == 0) return;
+
+ if (!aligned16(a) && len >= 1) {
+ zsm_write8( a, svNew );
+ a += 1;
+ len -= 1;
+ tl_assert(aligned16(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned32(a) && len >= 2) {
+ zsm_write16( a, svNew );
+ a += 2;
+ len -= 2;
+ tl_assert(aligned32(a));
+ }
+ if (len == 0) return;
+
+ if (!aligned64(a) && len >= 4) {
+ zsm_write32( a, svNew );
+ a += 4;
+ len -= 4;
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 8) {
+ tl_assert(aligned64(a));
+ while (len >= 8) {
+ zsm_write64( a, svNew );
+ a += 8;
+ len -= 8;
+ }
+ tl_assert(aligned64(a));
+ }
+ if (len == 0) return;
+
+ if (len >= 4)
+ tl_assert(aligned32(a));
+ if (len >= 4) {
+ zsm_write32( a, svNew );
+ a += 4;
+ len -= 4;
+ }
+ if (len == 0) return;
+
+ if (len >= 2)
+ tl_assert(aligned16(a));
+ if (len >= 2) {
+ zsm_write16( a, svNew );
+ a += 2;
+ len -= 2;
+ }
+ if (len == 0) return;
+
+ if (len >= 1) {
+ zsm_write8( a, svNew );
+ //a += 1;
+ len -= 1;
+ }
+ tl_assert(len == 0);
+}
+
+
+/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
+ for larger ranges, try to operate directly on the out-of-cache
+ representation, rather than dragging lines into the cache,
+ overwriting them, and forcing them out. This turns out to be an
+ important performance optimisation. */
+
+static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
+{
+ tl_assert(svNew != SVal_INVALID);
+ stats__cache_make_New_arange += (ULong)len;
+
+ if (0 && len > 500)
+ VG_(printf)("make New ( %#lx, %ld )\n", a, len );
+
+ if (0) {
+ static UWord n_New_in_cache = 0;
+ static UWord n_New_not_in_cache = 0;
+ /* tag is 'a' with the in-line offset masked out,
+ eg a[31]..a[4] 0000 */
+ Addr tag = a & ~(N_LINE_ARANGE - 1);
+ UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+ if (LIKELY(tag == cache_shmem.tags0[wix])) {
+ n_New_in_cache++;
+ } else {
+ n_New_not_in_cache++;
+ }
+ if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
+ VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
+ n_New_in_cache, n_New_not_in_cache );
+ }
+
+ if (LIKELY(len < 2 * N_LINE_ARANGE)) {
+ zsm_set_range_SMALL( a, len, svNew );
+ } else {
+ Addr before_start = a;
+ Addr aligned_start = cacheline_ROUNDUP(a);
+ Addr after_start = cacheline_ROUNDDN(a + len);
+ UWord before_len = aligned_start - before_start;
+ UWord aligned_len = after_start - aligned_start;
+ UWord after_len = a + len - after_start;
+ tl_assert(before_start <= aligned_start);
+ tl_assert(aligned_start <= after_start);
+ tl_assert(before_len < N_LINE_ARANGE);
+ tl_assert(after_len < N_LINE_ARANGE);
+ tl_assert(get_cacheline_offset(aligned_start) == 0);
+ if (get_cacheline_offset(a) == 0) {
+ tl_assert(before_len == 0);
+ tl_assert(a == aligned_start);
+ }
+ if (get_cacheline_offset(a+len) == 0) {
+ tl_assert(after_len == 0);
+ tl_assert(after_start == a+len);
+ }
+ if (before_len > 0) {
+ zsm_set_range_SMALL( before_start, before_len, svNew );
+ }
+ if (after_len > 0) {
+ zsm_set_range_SMALL( after_start, after_len, svNew );
+ }
+ stats__cache_make_New_inZrep += (ULong)aligned_len;
+
+ while (1) {
+ Addr tag;
+ UWord wix;
+ if (aligned_start >= after_start)
+ break;
+ tl_assert(get_cacheline_offset(aligned_start) == 0);
+ tag = aligned_start & ~(N_LINE_ARANGE - 1);
+ wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
+ if (tag == cache_shmem.tags0[wix]) {
+ UWord i;
+ for (i = 0; i < N_LINE_ARANGE / 8; i++)
+ zsm_write64( aligned_start + i * 8, svNew );
+ } else {
+ UWord i;
+ Word zix;
+ SecMap* sm;
+ LineZ* lineZ;
+ /* This line is not in the cache. Do not force it in; instead
+ modify it in-place. */
+ /* find the Z line to write in and rcdec it or the
+ associated F line. */
+ find_Z_for_writing( &sm, &zix, tag );
+ tl_assert(sm);
+ tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
+ lineZ = &sm->linesZ[zix];
+ lineZ->dict[0] = svNew;
+ lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+ for (i = 0; i < N_LINE_ARANGE/4; i++)
+ lineZ->ix2s[i] = 0; /* all refer to dict[0] */
+ rcinc_LineZ(lineZ);
+ }
+ aligned_start += N_LINE_ARANGE;
+ aligned_len -= N_LINE_ARANGE;
+ }
+ tl_assert(aligned_start == after_start);
+ tl_assert(aligned_len == 0);
+ }
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Synchronisation objects //
+// //
+/////////////////////////////////////////////////////////
+
+// (UInt) `echo "Synchronisation object" | md5sum`
+#define SO_MAGIC 0x56b3c5b0U
+
+struct _SO {
+ VtsID viR; /* r-clock of sender */
+ VtsID viW; /* w-clock of sender */
+ UInt magic;
+};
+
+static SO* SO__Alloc ( void ) {
+ SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
+ so->viR = VtsID_INVALID;
+ so->viW = VtsID_INVALID;
+ so->magic = SO_MAGIC;
+ return so;
+}
+static void SO__Dealloc ( SO* so ) {
+ tl_assert(so);
+ tl_assert(so->magic == SO_MAGIC);
+ if (so->viR == VtsID_INVALID) {
+ tl_assert(so->viW == VtsID_INVALID);
+ } else {
+ tl_assert(so->viW != VtsID_INVALID);
+ VtsID__rcdec(so->viR);
+ VtsID__rcdec(so->viW);
+ }
+ so->magic = 0;
+ HG_(free)( so );
+}
+
+
+/////////////////////////////////////////////////////////
+// //
+// Top Level API //
+// //
+/////////////////////////////////////////////////////////
+
+static void show_thread_state ( HChar* str, Thr* t )
+{
+ if (1) return;
+ if (t->viR == t->viW) {
+ VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
+ VtsID__pp( t->viR );
+ VG_(printf)("%s","\n");
+ } else {
+ VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
+ VtsID__pp( t->viR );
+ VG_(printf)(" viW %u==", t->viW);
+ VtsID__pp( t->viW );
+ VG_(printf)("%s","\n");
+ }
+}
+
+
+Thr* libhb_init (
+ void (*get_stacktrace)( Thr*, Addr*, UWord ),
+ ExeContext* (*get_EC)( Thr* )
+ )
+{
+ Thr* thr;
+ VtsID vi;
+ tl_assert(get_stacktrace);
+ tl_assert(get_EC);
+ main_get_stacktrace = get_stacktrace;
+ main_get_EC = get_EC;
+
+ // No need to initialise hg_wordfm.
+ // No need to initialise hg_wordset.
+
+ vts_set_init();
+ vts_tab_init();
+ event_map_init();
+ VtsID__invalidate_caches();
+
+ // initialise shadow memory
+ zsm_init( SVal__rcinc, SVal__rcdec );
+
+ thr = Thr__new();
+ vi = VtsID__mk_Singleton( thr, 1 );
+ thr->viR = vi;
+ thr->viW = vi;
+ VtsID__rcinc(thr->viR);
+ VtsID__rcinc(thr->viW);
+
+ show_thread_state(" root", thr);
+ return thr;
+}
+
+Thr* libhb_create ( Thr* parent )
+{
+ /* The child's VTSs are copies of the parent's VTSs, but ticked at
+ the child's index. Since the child's index is guaranteed
+ unique, it has never been seen before, so the implicit value
+ before the tick is zero and after that is one. */
+ Thr* child = Thr__new();
+
+ child->viR = VtsID__tick( parent->viR, child );
+ child->viW = VtsID__tick( parent->viW, child );
+ VtsID__rcinc(child->viR);
+ VtsID__rcinc(child->viW);
+
+ tl_assert(VtsID__indexAt( child->viR, child ) == 1);
+ tl_assert(VtsID__indexAt( child->viW, child ) == 1);
+
+ /* and the parent has to move along too */
+ VtsID__rcdec(parent->viR);
+ VtsID__rcdec(parent->viW);
+ parent->viR = VtsID__tick( parent->viR, parent );
+ parent->viW = VtsID__tick( parent->viW, parent );
+ VtsID__rcinc(parent->viR);
+ VtsID__rcinc(parent->viW);
+
+ show_thread_state(" child", child);
+ show_thread_state("parent", parent);
+
+ return child;
+}
+
+/* Shut down the library, and print stats (in fact that's _all_
+ this is for. */
+void libhb_shutdown ( Bool show_stats )
+{
+ if (show_stats) {
+ VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
+ VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
+ stats__secmaps_allocd,
+ stats__secmap_ga_space_covered);
+ VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
+ stats__secmap_linesZ_allocd,
+ stats__secmap_linesZ_bytes);
+ VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
+ stats__secmap_linesF_allocd,
+ stats__secmap_linesF_bytes);
+ VG_(printf)(" secmaps: %'10lu iterator steppings\n",
+ stats__secmap_iterator_steppings);
+ VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
+ stats__secmaps_search, stats__secmaps_search_slow);
+
+ VG_(printf)("%s","\n");
+ VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
+ stats__cache_totrefs, stats__cache_totmisses );
+ VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
+ stats__cache_Z_fetches, stats__cache_F_fetches );
+ VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
+ stats__cache_Z_wbacks, stats__cache_F_wbacks );
+ VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
+ stats__cache_invals, stats__cache_flushes );
+ VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
+ stats__cache_make_New_arange,
+ stats__cache_make_New_inZrep);
+
+ VG_(printf)("%s","\n");
+ VG_(printf)(" cline: %'10lu normalises\n",
+ stats__cline_normalises );
+ VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+ stats__cline_read64s,
+ stats__cline_read32s,
+ stats__cline_read16s,
+ stats__cline_read8s );
+ VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+ stats__cline_write64s,
+ stats__cline_write32s,
+ stats__cline_write16s,
+ stats__cline_write8s );
+ VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+ stats__cline_set64s,
+ stats__cline_set32s,
+ stats__cline_set16s,
+ stats__cline_set8s );
+ VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
+ stats__cline_get8s, stats__cline_copy8s );
+ VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
+ stats__cline_64to32splits,
+ stats__cline_32to16splits,
+ stats__cline_16to8splits );
+ VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
+ stats__cline_64to32pulldown,
+ stats__cline_32to16pulldown,
+ stats__cline_16to8pulldown );
+ if (0)
+ VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
+ (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
+
+ VG_(printf)("%s","\n");
+
+ VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
+ stats__msm_read, stats__msm_read_change);
+ VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
+ stats__msm_write, stats__msm_write_change);
+ VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
+ stats__getOrdering_queries, stats__getOrdering_misses);
+ VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
+ stats__join2_queries, stats__join2_misses);
+
+ VG_(printf)("%s","\n");
+ VG_(printf)(
+ " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
+ VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
+ );
+ VG_(printf)( " libhb: %lu entries in vts_set\n",
+ VG_(sizeFM)( vts_set ) );
+
+ VG_(printf)("%s","\n");
+ VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
+ stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
+ stats__ctxt_rcdec2,
+ stats__ctxt_rcdec3 );
+ VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
+ stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
+ VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
+ (UWord)N_RCEC_TAB,
+ stats__ctxt_tab_curr );
+ VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
+ stats__ctxt_tab_qs,
+ stats__ctxt_tab_cmps );
+#if 0
+ VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
+ VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
+ VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
+ VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
+ VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
+ VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
+ VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
+ VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
+ VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
+ VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
+ VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
+ VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
+ VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
+ VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
+
+ VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
+ VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
+ VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
+ VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
+#endif
+
+ VG_(printf)("%s","<<< END libhb stats >>>\n");
+ VG_(printf)("%s","\n");
+
+ }
+}
+
+void libhb_async_exit ( Thr* thr )
+{
+ /* is there anything we need to do? */
+}
+
+/* Both Segs and SOs point to VTSs. However, there is no sharing, so
+ a Seg that points at a VTS is its one-and-only owner, and ditto for
+ a SO that points at a VTS. */
+
+SO* libhb_so_alloc ( void )
+{
+ return SO__Alloc();
+}
+
+void libhb_so_dealloc ( SO* so )
+{
+ tl_assert(so);
+ tl_assert(so->magic == SO_MAGIC);
+ SO__Dealloc(so);
+}
+
+/* See comments in libhb.h for details on the meaning of
+ strong vs weak sends and strong vs weak receives. */
+void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
+{
+ /* Copy the VTSs from 'thr' into the sync object, and then move
+ the thread along one step. */
+
+ tl_assert(so);
+ tl_assert(so->magic == SO_MAGIC);
+
+ /* stay sane .. a thread's read-clock must always lead or be the
+ same as its write-clock */
+ { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
+ tl_assert(ord == POrd_EQ || ord == POrd_LT);
+ }
+
+ /* since we're overwriting the VtsIDs in the SO, we need to drop
+ any references made by the previous contents thereof */
+ if (so->viR == VtsID_INVALID) {
+ tl_assert(so->viW == VtsID_INVALID);
+ so->viR = thr->viR;
+ so->viW = thr->viW;
+ VtsID__rcinc(so->viR);
+ VtsID__rcinc(so->viW);
+ } else {
+ /* In a strong send, we dump any previous VC in the SO and
+ install the sending thread's VC instead. For a weak send we
+ must join2 with what's already there. */
+ tl_assert(so->viW != VtsID_INVALID);
+ VtsID__rcdec(so->viR);
+ VtsID__rcdec(so->viW);
+ so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
+ so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
+ VtsID__rcinc(so->viR);
+ VtsID__rcinc(so->viW);
+ }
+
+ /* move both parent clocks along */
+ VtsID__rcdec(thr->viR);
+ VtsID__rcdec(thr->viW);
+ thr->viR = VtsID__tick( thr->viR, thr );
+ thr->viW = VtsID__tick( thr->viW, thr );
+ VtsID__rcinc(thr->viR);
+ VtsID__rcinc(thr->viW);
+ if (strong_send)
+ show_thread_state("s-send", thr);
+ else
+ show_thread_state("w-send", thr);
+}
+
+void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
+{
+ tl_assert(so);
+ tl_assert(so->magic == SO_MAGIC);
+
+ if (so->viR != VtsID_INVALID) {
+ tl_assert(so->viW != VtsID_INVALID);
+
+ /* Weak receive (basically, an R-acquisition of a R-W lock).
+ This advances the read-clock of the receiver, but not the
+ write-clock. */
+ VtsID__rcdec(thr->viR);
+ thr->viR = VtsID__join2( thr->viR, so->viR );
+ VtsID__rcinc(thr->viR);
+
+ /* For a strong receive, we also advance the receiver's write
+ clock, which means the receive as a whole is essentially
+ equivalent to a W-acquisition of a R-W lock. */
+ if (strong_recv) {
+ VtsID__rcdec(thr->viW);
+ thr->viW = VtsID__join2( thr->viW, so->viW );
+ VtsID__rcinc(thr->viW);
+ }
+
+ if (strong_recv)
+ show_thread_state("s-recv", thr);
+ else
+ show_thread_state("w-recv", thr);
+
+ } else {
+ tl_assert(so->viW == VtsID_INVALID);
+ /* Deal with degenerate case: 'so' has no vts, so there has been
+ no message posted to it. Just ignore this case. */
+ show_thread_state("d-recv", thr);
+ }
+}
+
+Bool libhb_so_everSent ( SO* so )
+{
+ if (so->viR == VtsID_INVALID) {
+ tl_assert(so->viW == VtsID_INVALID);
+ return False;
+ } else {
+ tl_assert(so->viW != VtsID_INVALID);
+ return True;
+ }
+}
+
+#define XXX1 0 // 0x67a106c
+#define XXX2 0
+
+static Bool TRACEME(Addr a, SizeT szB) {
+ if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
+ if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
+ return False;
+}
+static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
+ SVal sv = zsm_read8(a);
+ VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
+ show_thread_state("", thr);
+ VG_(printf)("%s","\n");
+}
+
+void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
+{
+ SVal sv = SVal__mkC(thr->viW, thr->viW);
+ tl_assert(is_sane_SVal_C(sv));
+ if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
+ zsm_set_range( a, szB, sv );
+ if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
+}
+
+void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
+{
+ if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
+ zsm_set_range( a, szB, SVal__mkA() );
+ if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
+}
+
+void* libhb_get_Thr_opaque ( Thr* thr ) {
+ tl_assert(thr);
+ return thr->opaque;
+}
+
+void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
+ tl_assert(thr);
+ thr->opaque = v;
+}
+
+void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
+{
+ zsm_copy_range(dst, src, len);
+}
+
+void libhb_maybe_GC ( void )
+{
+ event_map_maybe_GC();
+ /* If there are still freelist entries available, no need for a
+ GC. */
+ if (vts_tab_freelist != VtsID_INVALID)
+ return;
+ /* So all the table entries are full, and we're having to expand
+ the table. But did we hit the threshhold point yet? */
+ if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
+ return;
+ vts_tab__do_GC( False/*don't show stats*/ );
+}
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION END main library //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/*--------------------------------------------------------------------*/
+/*--- end libhb_main.c ---*/
+/*--------------------------------------------------------------------*/