19 files changed, 14177 insertions, 0 deletions
diff --git a/helgrind/.svn/dir-prop-base b/helgrind/.svn/dir-prop-base
new file mode 100644
index 0000000..da4a263
--- /dev/null
+++ b/helgrind/.svn/dir-prop-base
@@ -0,0 +1,16 @@
+K 10
+svn:ignore
+V 243
+.deps
+helgrind-amd64-linux
+helgrind-ppc32-linux
+helgrind-ppc64-linux
+helgrind-x86-linux
+Makefile
+Makefile.in
+vgpreload_helgrind-amd64-linux.so
+vgpreload_helgrind-ppc32-linux.so
+vgpreload_helgrind-ppc64-linux.so
+vgpreload_helgrind-x86-linux.so
+
+END
diff --git a/helgrind/.svn/entries b/helgrind/.svn/entries
new file mode 100644
index 0000000..3b4259a
--- /dev/null
+++ b/helgrind/.svn/entries
@@ -0,0 +1,226 @@
+8
+
+dir
+9703
+svn://svn.valgrind.org/valgrind/trunk/helgrind
+svn://svn.valgrind.org/valgrind
+
+
+
+2009-03-15T23:25:38.213170Z
+9416
+njn
+has-props
+
+svn:special svn:externals svn:needs-lock
+
+
+
+
+
+
+
+
+
+
+
+a5019735-40e9-0310-863c-91ae7b9d1cf9
+
+hg_intercepts.c
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+4c0347b80766f4bc77bf3c07f6732f71
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+hg_errors.h
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+49cfb9c69c123d0d7fd0b5e3d1da8e1f
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+helgrind.h
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+4d5f0424f46fabd606a69c95cbb32d70
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+hg_main.c
+file
+
+
+
+
+2009-04-30T16:43:55.000000Z
+ebb2d4f778cb646795ef7153502766fc
+2009-03-15T23:25:38.213170Z
+9416
+njn
+
+hg_wordset.c
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+d8a429ba83845e51ffc57b37348b7ddb
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+docs
+dir
+
+README_YARD.txt
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+bb34f6de399819578286f5f3077ae81f
+2008-10-25T16:22:41.648611Z
+8707
+sewardj
+
+hg_lock_n_thread.c
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+8d7af180e014bb6118d169588f603fb3
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+hg_wordset.h
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+4b9896a3623d4bb410438d7efb61856c
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+hg_lock_n_thread.h
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+cf621660dfe32f08290225cc4d001aae
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+tests
+dir
+
+hg_basics.c
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+8c29c10deb3e538eebc7b83d902b4133
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+libhb.h
+file
+
+
+
+
+2009-03-13T17:29:59.000000Z
+68278834bfa089717072ddb0b07df10e
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+hg_basics.h
+file
+
+
+
+
+2009-03-13T17:30:00.000000Z
+5d981ad3e0ef05e01ee0e6b88f4e8f9f
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
+libhb_core.c
+file
+
+
+
+
+2009-04-30T16:43:55.000000Z
+3d64543b3dc34ab4e62405b369bba155
+2009-03-15T23:25:38.213170Z
+9416
+njn
+
+Makefile.am
+file
+
+
+
+
+2009-03-13T17:30:00.000000Z
+5386f6c46810c3f9a826fa3277e073f6
+2009-01-22T21:56:32.234907Z
+9031
+njn
+
+README_MSMProp2.txt
+file
+
+
+
+
+2009-03-13T17:30:00.000000Z
+967ec9869d6c3f23157f4dfc405b76f1
+2008-10-25T16:22:41.648611Z
+8707
+sewardj
+
+hg_errors.c
+file
+
+
+
+
+2009-03-13T17:30:00.000000Z
+bd6ffc9b7c116a65d51bece430ae2f7a
+2009-03-10T22:02:09.669944Z
+9344
+njn
+
diff --git a/helgrind/.svn/format b/helgrind/.svn/format
new file mode 100644
index 0000000..45a4fb7
--- /dev/null
+++ b/helgrind/.svn/format
@@ -0,0 +1 @@
+8
diff --git a/helgrind/.svn/text-base/Makefile.am.svn-base b/helgrind/.svn/text-base/Makefile.am.svn-base
new file mode 100644
index 0000000..02f96ba
--- /dev/null
+++ b/helgrind/.svn/text-base/Makefile.am.svn-base
@@ -0,0 +1,127 @@
+include $(top_srcdir)/Makefile.tool.am
+
+noinst_PROGRAMS = 
+if VGCONF_PLATFORMS_INCLUDE_X86_LINUX
+noinst_PROGRAMS += helgrind-x86-linux vgpreload_helgrind-x86-linux.so
+endif
+if VGCONF_PLATFORMS_INCLUDE_AMD64_LINUX
+noinst_PROGRAMS += helgrind-amd64-linux vgpreload_helgrind-amd64-linux.so
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC32_LINUX
+noinst_PROGRAMS += helgrind-ppc32-linux vgpreload_helgrind-ppc32-linux.so
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC64_LINUX
+noinst_PROGRAMS += helgrind-ppc64-linux vgpreload_helgrind-ppc64-linux.so
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC32_AIX5
+noinst_PROGRAMS += helgrind-ppc32-aix5 vgpreload_helgrind-ppc32-aix5.so
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC64_AIX5
+noinst_PROGRAMS += helgrind-ppc64-aix5 vgpreload_helgrind-ppc64-aix5.so
+endif
+
+VGPRELOAD_HELGRIND_SOURCES_COMMON = hg_intercepts.c
+
+vgpreload_helgrind_x86_linux_so_SOURCES      = $(VGPRELOAD_HELGRIND_SOURCES_COMMON)
+vgpreload_helgrind_x86_linux_so_CPPFLAGS     = $(AM_CPPFLAGS_X86_LINUX)
+vgpreload_helgrind_x86_linux_so_CFLAGS       = $(AM_CFLAGS_X86_LINUX) $(AM_CFLAGS_PIC)
+vgpreload_helgrind_x86_linux_so_DEPENDENCIES = $(LIBREPLACEMALLOC_X86_LINUX)
+vgpreload_helgrind_x86_linux_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_X86_LINUX) \
+	$(LIBREPLACEMALLOC_LDFLAGS_X86_LINUX)
+
+vgpreload_helgrind_amd64_linux_so_SOURCES      = $(VGPRELOAD_HELGRIND_SOURCES_COMMON)
+vgpreload_helgrind_amd64_linux_so_CPPFLAGS     = $(AM_CPPFLAGS_AMD64_LINUX)
+vgpreload_helgrind_amd64_linux_so_CFLAGS       = $(AM_CFLAGS_AMD64_LINUX) $(AM_CFLAGS_PIC)
+vgpreload_helgrind_amd64_linux_so_DEPENDENCIES = $(LIBREPLACEMALLOC_AMD64_LINUX)
+vgpreload_helgrind_amd64_linux_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_AMD64_LINUX) \
+	$(LIBREPLACEMALLOC_LDFLAGS_AMD64_LINUX)
+
+vgpreload_helgrind_ppc32_linux_so_SOURCES      = $(VGPRELOAD_HELGRIND_SOURCES_COMMON)
+vgpreload_helgrind_ppc32_linux_so_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_LINUX)
+vgpreload_helgrind_ppc32_linux_so_CFLAGS       = $(AM_CFLAGS_PPC32_LINUX) $(AM_CFLAGS_PIC)
+vgpreload_helgrind_ppc32_linux_so_DEPENDENCIES = $(LIBREPLACEMALLOC_PPC32_LINUX)
+vgpreload_helgrind_ppc32_linux_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_PPC32_LINUX) \
+	$(LIBREPLACEMALLOC_LDFLAGS_PPC32_LINUX)
+
+vgpreload_helgrind_ppc64_linux_so_SOURCES      = $(VGPRELOAD_HELGRIND_SOURCES_COMMON)
+vgpreload_helgrind_ppc64_linux_so_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_LINUX)
+vgpreload_helgrind_ppc64_linux_so_CFLAGS       = $(AM_CFLAGS_PPC64_LINUX) $(AM_CFLAGS_PIC)
+vgpreload_helgrind_ppc64_linux_so_DEPENDENCIES = $(LIBREPLACEMALLOC_PPC64_LINUX)
+vgpreload_helgrind_ppc64_linux_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_PPC64_LINUX) \
+	$(LIBREPLACEMALLOC_LDFLAGS_PPC64_LINUX)
+
+vgpreload_helgrind_ppc32_aix5_so_SOURCES      = $(VGPRELOAD_HELGRIND_SOURCES_COMMON)
+vgpreload_helgrind_ppc32_aix5_so_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_AIX5)
+vgpreload_helgrind_ppc32_aix5_so_CFLAGS       = $(AM_CFLAGS_PPC32_AIX5) $(AM_CFLAGS_PIC)
+vgpreload_helgrind_ppc32_aix5_so_DEPENDENCIES = $(LIBREPLACEMALLOC_PPC32_AIX5)
+vgpreload_helgrind_ppc32_aix5_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_PPC32_AIX5) \
+	$(LIBREPLACEMALLOC_LDFLAGS_PPC32_AIX5)
+
+vgpreload_helgrind_ppc64_aix5_so_SOURCES      = $(VGPRELOAD_HELGRIND_SOURCES_COMMON)
+vgpreload_helgrind_ppc64_aix5_so_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_AIX5)
+vgpreload_helgrind_ppc64_aix5_so_CFLAGS       = $(AM_CFLAGS_PPC64_AIX5) $(AM_CFLAGS_PIC)
+vgpreload_helgrind_ppc64_aix5_so_DEPENDENCIES = $(LIBREPLACEMALLOC_PPC64_AIX5)
+vgpreload_helgrind_ppc64_aix5_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_PPC64_AIX5) \
+	$(LIBREPLACEMALLOC_LDFLAGS_PPC64_AIX5)
+
+HELGRIND_SOURCES_COMMON = \
+	hg_basics.c hg_lock_n_thread.c hg_wordset.c libhb_core.c \
+	hg_errors.c hg_main.c
+
+helgrind_x86_linux_SOURCES      = $(HELGRIND_SOURCES_COMMON)
+helgrind_x86_linux_CPPFLAGS     = $(AM_CPPFLAGS_X86_LINUX)
+helgrind_x86_linux_CFLAGS       = $(AM_CFLAGS_X86_LINUX) -O2
+helgrind_x86_linux_DEPENDENCIES = $(COREGRIND_LIBS_X86_LINUX)
+helgrind_x86_linux_LDADD        = $(TOOL_LDADD_X86_LINUX)
+helgrind_x86_linux_LDFLAGS      = $(TOOL_LDFLAGS_X86_LINUX)
+
+helgrind_amd64_linux_SOURCES      = $(HELGRIND_SOURCES_COMMON)
+helgrind_amd64_linux_CPPFLAGS     = $(AM_CPPFLAGS_AMD64_LINUX)
+helgrind_amd64_linux_CFLAGS       = $(AM_CFLAGS_AMD64_LINUX) -O2
+helgrind_amd64_linux_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_LINUX)
+helgrind_amd64_linux_LDADD        = $(TOOL_LDADD_AMD64_LINUX)
+helgrind_amd64_linux_LDFLAGS      = $(TOOL_LDFLAGS_AMD64_LINUX)
+
+helgrind_ppc32_linux_SOURCES      = $(HELGRIND_SOURCES_COMMON)
+helgrind_ppc32_linux_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_LINUX)
+helgrind_ppc32_linux_CFLAGS       = $(AM_CFLAGS_PPC32_LINUX) -O2
+helgrind_ppc32_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_LINUX)
+helgrind_ppc32_linux_LDADD        = $(TOOL_LDADD_PPC32_LINUX)
+helgrind_ppc32_linux_LDFLAGS      = $(TOOL_LDFLAGS_PPC32_LINUX)
+
+helgrind_ppc64_linux_SOURCES      = $(HELGRIND_SOURCES_COMMON)
+helgrind_ppc64_linux_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_LINUX)
+helgrind_ppc64_linux_CFLAGS       = $(AM_CFLAGS_PPC64_LINUX) -O2
+helgrind_ppc64_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_LINUX)
+helgrind_ppc64_linux_LDADD        = $(TOOL_LDADD_PPC64_LINUX)
+helgrind_ppc64_linux_LDFLAGS      = $(TOOL_LDFLAGS_PPC64_LINUX)
+
+helgrind_ppc32_aix5_SOURCES      = $(HELGRIND_SOURCES_COMMON)
+helgrind_ppc32_aix5_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_AIX5)
+helgrind_ppc32_aix5_CFLAGS       = $(AM_CFLAGS_PPC32_AIX5) -O2
+helgrind_ppc32_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_AIX5)
+helgrind_ppc32_aix5_LDADD        = $(TOOL_LDADD_PPC32_AIX5)
+helgrind_ppc32_aix5_LDFLAGS      = $(TOOL_LDFLAGS_PPC32_AIX5)
+
+helgrind_ppc64_aix5_SOURCES      = $(HELGRIND_SOURCES_COMMON)
+helgrind_ppc64_aix5_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_AIX5)
+helgrind_ppc64_aix5_CFLAGS       = $(AM_CFLAGS_PPC64_AIX5) -O2
+helgrind_ppc64_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_AIX5)
+helgrind_ppc64_aix5_LDADD        = $(TOOL_LDADD_PPC64_AIX5)
+helgrind_ppc64_aix5_LDFLAGS      = $(TOOL_LDFLAGS_PPC64_AIX5)
+
+hgincludedir = $(includedir)/valgrind
+
+hginclude_HEADERS = helgrind.h
+
+noinst_HEADERS = \
+	hg_basics.h hg_lock_n_thread.h hg_errors.h hg_wordset.h \
+	libhb.h
+
+EXTRA_DIST = README_MSMProp2.txt README_YARD.txt
diff --git a/helgrind/.svn/text-base/README_MSMProp2.txt.svn-base b/helgrind/.svn/text-base/README_MSMProp2.txt.svn-base
new file mode 100644
index 0000000..6b4ac5f
--- /dev/null
+++ b/helgrind/.svn/text-base/README_MSMProp2.txt.svn-base
@@ -0,0 +1,156 @@
+
+MSMProp2, a simplified but functionally equivalent version of MSMProp1
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Julian Seward, OpenWorks Ltd, 19 August 2008
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Note that this file does NOT describe the state machine used in the
+svn://svn.valgrind.org/branches/YARD version of Helgrind.  That state
+machine is different again from any previously described machine.
+
+See the file README_YARD.txt for more details on YARD.
+
+                     ----------------------
+
+In early 2008 Konstantin Serebryany proposed "MSMProp1", a memory
+state machine for data race detection.  It is described at
+http://code.google.com/p/data-race-test/wiki/MSMProp1
+
+Implementation experiences show MSMProp1 is useful, but difficult to
+implement efficiently.  In particular keeping the memory usage under
+control is complex and difficult.
+
+This note points out a key simplification of MSMProp1, which makes it
+easier to implement without changing the functionality.
+
+
+The idea
+~~~~~~~~
+
+The core of the idea pertains to the "Condition" entry for MSMProp1
+state machine rules E5 and E6(r).  These are, respectively:
+
+    HB(SS, currS)  and its negation
+    ! HB(SS, currS).
+
+Here, SS is a set of segments, and currS is a single segment.  Each
+segment contains a vector timestamp.  The expression "HB(SS, currS)"
+is intended to denote
+
+   for each segment S in SS  .  happens_before(S,currS)
+
+where happens_before(S,T) means that S's vector timestamp is ordered
+before-or-equal to T's vector timestamp.
+
+In words, the expression
+
+   for each segment S in SS  .  happens_before(S,currS)
+
+is equivalent to saying that currS has a timestamp which is
+greater-than-equal to the timestamps of all the segments in SS.
+
+The key observation is that this is equivalent to
+
+   happens_before( JOIN(SS), currS )
+
+where JOIN is the lattice-theoretic "max" or "least upper bound"
+operation on vector clocks.  Given the definition of HB,
+happens_before and (binary) JOIN, this is easy to prove.
+
+
+The consequences
+~~~~~~~~~~~~~~~~
+
+With that observation in place, it is a short step to observe that
+storing segment sets in MSMProp1 is unnecessary.  Instead of
+storing a segment set in each shadow value, just store and
+update a single vector timestamp.  The following two equivalences
+hold:
+
+   MSMProp1                        MSMProp2
+
+   adding a segment S              join-ing S's vector timestamp
+   to the segment-set              to the current vector timestamp
+
+   HB(SS,currS)                    happens_before(
+                                      currS's timestamp,
+                                      current vector timestamp )
+
+Once it is no longer necessary to represent segment sets, it then
+also becomes unnecessary to represent segments.  This constitutes
+a significant simplication to the implementation.
+
+
+The resulting state machine, MSMProp2
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+MSMProp2 is isomorphic to MSMProp1, with the following changes:
+
+States are    New,   Read(VTS,LS),   Write(VTS,LS)
+
+where LS is a lockset (as before) and VTS is a vector timestamp.
+
+For a thread T with current lockset 'currLS' and current VTS 'currVTS'
+making a memory access, the new rules are
+
+Name  Old-State         Op  Guard         New-State              Race-If
+
+E1  New                 rd  True          Read(currVTS,currLS)   False
+
+E2  New                 wr  True          Write(currVTS,currLS)  False
+
+E3  Read(oldVTS,oldLS)  rd  True          Read(newVTS,newLS)     False
+
+E4  Read(oldVTS,oldLS)  wr  True          Write(newVTS,newLS)    #newLS == 0 
+                                                                 && !hb(oldVTS,currVTS)
+
+E5  Write(oldVTS,oldLS) rd  hb(oldVTS,    Read(currVTS,currLS)   False
+                               currVTS)
+
+E6r Write(oldVTS,oldLS) rd  !hb(oldVTS,   Write(newVTS,newLS)    #newLS == 0 
+                                currVTS)                         && !hb(oldVTS,currVTS)
+
+E6w Write(oldVTS,oldLS) wr  True          Write(newVTS,newLS)    #newLS == 0 
+                                                                 && !hb(oldVTS,currVTS)
+
+   where newVTS = join2(oldVTS,currVTS)
+
+         newLS  = if   hb(oldVTS,currVTS)
+                  then currLS
+                  else intersect(oldLS,currLS)
+
+         hb(vts1, vts2) =  vts1 happens before or is equal to vts2
+
+
+Interpretation of the states
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+I always found the state names in MSMProp1 confusing.  Both MSMProp1
+and MSMProp2 are easier to understand if the states Read and Write are
+renamed, like this:
+
+   old name           new name
+
+   Read               WriteConstraint
+   Write              AllConstraint
+
+The effect of a state Read(VTS,LS) is to constrain all later-observed
+writes so that either (1) the writing thread holds at least one lock
+in common with LS, or (2) those writes must happen-after VTS.  If
+neither of those two conditions hold, a race is reported.
+
+Hence a Read state places a constraint on writes.
+
+The effect of a state Write(VTS,LS) is similar, but it applies to all
+later-observed accesses: either (1) the accessing thread holds at
+least one lock in common with LS, or (2) those accesses must
+happen-after VTS.  If neither of those two conditions hold, a race is
+reported.
+
+Hence a Write state places a constraint on all accesses.
+
+If we ignore the LS component of these states, the intuitive
+interpretation of the VTS component is that it states the earliest
+vector-time that the next write / access may safely happen.
+
diff --git a/helgrind/.svn/text-base/README_YARD.txt.svn-base b/helgrind/.svn/text-base/README_YARD.txt.svn-base
new file mode 100644
index 0000000..992769c
--- /dev/null
+++ b/helgrind/.svn/text-base/README_YARD.txt.svn-base
@@ -0,0 +1,34 @@
+
+YARD, Yet Another Race Detector, built on the Helgrind framework
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Julian Seward, OpenWorks Ltd, 19 August 2008
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The YARD race detector lives in svn://svn.valgrind.org/branches/YARD.
+
+It uses a new and relatively simple race detection engine, based on
+the idea of shadowing each memory location with two vector timestamps,
+indicating respectively the "earliest safe read point" and "earliest
+safe write point".  As far as I know this is a novel approach.  Some
+features of the implementation:
+
+* Modularity.  The entire race detection engine is placed in a
+  standalone library (libhb_core.c) with a simple interface (libhb.h).
+  This makes it easier to debug and verify the engine; indeed it can
+  be built as a standalone executable with test harness using "make -f
+  Makefile_sa".
+
+* Simplified and scalable storage management, so that large programs,
+  with many synchronisation events, can be handled.
+
+* Ability to report both call stacks involved in a race, without
+  excessive time or space overhead.
+
+* Pure happens before operation, so as not to give any false
+  positives.
+
+To use, build as usual and run as "--tool=helgrind".
+
+You can disable lock order checking with --track-lockorders=no, as it
+sometimes produces an annoying amount of output.
diff --git a/helgrind/.svn/text-base/helgrind.h.svn-base b/helgrind/.svn/text-base/helgrind.h.svn-base
new file mode 100644
index 0000000..7696e7d
--- /dev/null
+++ b/helgrind/.svn/text-base/helgrind.h.svn-base
@@ -0,0 +1,113 @@
+/*
+   ----------------------------------------------------------------
+
+   Notice that the above BSD-style license applies to this one file
+   (helgrind.h) only.  The entire rest of Valgrind is licensed under
+   the terms of the GNU General Public License, version 2.  See the
+   COPYING file in the source distribution for details.
+
+   ----------------------------------------------------------------
+
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks LLP
+      info@open-works.co.uk
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+   2. The origin of this software must not be misrepresented; you must 
+      not claim that you wrote the original software.  If you use this 
+      software in a product, an acknowledgment in the product 
+      documentation would be appreciated but is not required.
+
+   3. Altered source versions must be plainly marked as such, and must
+      not be misrepresented as being the original software.
+
+   4. The name of the author may not be used to endorse or promote 
+      products derived from this software without specific prior written 
+      permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   ----------------------------------------------------------------
+
+   Notice that the above BSD-style license applies to this one file
+   (helgrind.h) only.  The entire rest of Valgrind is licensed under
+   the terms of the GNU General Public License, version 2.  See the
+   COPYING file in the source distribution for details.
+
+   ---------------------------------------------------------------- 
+*/
+
+#ifndef __HELGRIND_H
+#define __HELGRIND_H
+
+#include "valgrind.h"
+
+typedef
+   enum {
+      VG_USERREQ__HG_CLEAN_MEMORY = VG_USERREQ_TOOL_BASE('H','G'),
+
+      /* The rest are for Helgrind's internal use.  Not for end-user
+         use.  Do not use them unless you are a Valgrind developer. */
+
+      /* Notify the tool what this thread's pthread_t is. */
+      _VG_USERREQ__HG_SET_MY_PTHREAD_T = VG_USERREQ_TOOL_BASE('H','G') 
+                                         + 256,
+      _VG_USERREQ__HG_PTH_API_ERROR,              /* char*, int */
+      _VG_USERREQ__HG_PTHREAD_JOIN_POST,          /* pthread_t of quitter */
+      _VG_USERREQ__HG_PTHREAD_MUTEX_INIT_POST,    /* pth_mx_t*, long mbRec */
+      _VG_USERREQ__HG_PTHREAD_MUTEX_DESTROY_PRE,  /* pth_mx_t* */
+      _VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE,   /* pth_mx_t* */
+      _VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_POST,  /* pth_mx_t* */
+      _VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_PRE, /* pth_mx_t*, long isTryLock */
+      _VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,    /* pth_mx_t* */
+      _VG_USERREQ__HG_PTHREAD_COND_SIGNAL_PRE,    /* pth_cond_t* */
+      _VG_USERREQ__HG_PTHREAD_COND_BROADCAST_PRE, /* pth_cond_t* */
+      _VG_USERREQ__HG_PTHREAD_COND_WAIT_PRE,     /* pth_cond_t*, pth_mx_t* */
+      _VG_USERREQ__HG_PTHREAD_COND_WAIT_POST,    /* pth_cond_t*, pth_mx_t* */
+      _VG_USERREQ__HG_PTHREAD_COND_DESTROY_PRE,   /* pth_cond_t* */
+      _VG_USERREQ__HG_PTHREAD_RWLOCK_INIT_POST,   /* pth_rwlk_t* */
+      _VG_USERREQ__HG_PTHREAD_RWLOCK_DESTROY_PRE, /* pth_rwlk_t* */
+      _VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE,    /* pth_rwlk_t*, long isW */
+      _VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_POST,   /* pth_rwlk_t*, long isW */
+      _VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_PRE,  /* pth_rwlk_t* */
+      _VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_POST, /* pth_rwlk_t* */
+      _VG_USERREQ__HG_POSIX_SEM_INIT_POST,        /* sem_t*, ulong value */
+      _VG_USERREQ__HG_POSIX_SEM_DESTROY_PRE,      /* sem_t* */
+      _VG_USERREQ__HG_POSIX_SEM_POST_PRE,         /* sem_t* */
+      _VG_USERREQ__HG_POSIX_SEM_WAIT_POST,        /* sem_t* */
+      _VG_USERREQ__HG_PTHREAD_BARRIER_INIT_PRE,   /* pth_bar_t*, ulong */
+      _VG_USERREQ__HG_PTHREAD_BARRIER_WAIT_PRE,   /* pth_bar_t* */
+      _VG_USERREQ__HG_PTHREAD_BARRIER_DESTROY_PRE /* pth_bar_t* */
+   } Vg_TCheckClientRequest;
+
+/* Clean memory state.  This makes Helgrind forget everything it knew
+   about the specified memory range, and resets it to New.  This is
+   particularly useful for memory allocators that wish to recycle
+   memory. */
+#define VALGRIND_HG_CLEAN_MEMORY(_qzz_start, _qzz_len)                    \
+   do {                                                                   \
+     unsigned long _qzz_res;                                              \
+     VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__HG_CLEAN_MEMORY, \
+                                _qzz_start, _qzz_len, 0, 0, 0);	          \
+     (void)0;                                                             \
+   } while(0)
+
+#endif /* __HELGRIND_H */
diff --git a/helgrind/.svn/text-base/hg_basics.c.svn-base b/helgrind/.svn/text-base/hg_basics.c.svn-base
new file mode 100644
index 0000000..d7f3fa7
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_basics.c.svn-base
@@ -0,0 +1,86 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Basic definitions for all of Helgrind.                       ---*/
+/*---                                                  hg_basics.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_threadstate.h"
+
+#include "hg_basics.h"            /* self */
+
+
+/*----------------------------------------------------------------*/
+/*--- Very basic stuff                                         ---*/
+/*----------------------------------------------------------------*/
+
+void* HG_(zalloc) ( HChar* cc, SizeT n )
+{
+   void* p;
+   tl_assert(n > 0);
+   p = VG_(malloc)( cc, n );
+   tl_assert(p);
+   VG_(memset)(p, 0, n);
+   return p;
+}
+
+void HG_(free) ( void* p )
+{
+   tl_assert(p);
+   VG_(free)(p);
+}
+
+Char* HG_(strdup) ( HChar* cc, const Char* s )
+{
+   return VG_(strdup)( cc, s );
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- Command line options                                     ---*/
+/*----------------------------------------------------------------*/
+
+/* Description of these flags is in hg_basics.h. */
+
+Bool  HG_(clo_track_lockorders) = True;
+
+Bool  HG_(clo_cmp_race_err_addrs) = False;
+
+Bool  HG_(clo_show_conflicts) = True;
+
+UWord HG_(clo_conflict_cache_size) = 1000000;
+
+Word  HG_(clo_sanity_flags) = 0;
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              hg_basics.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/hg_basics.h.svn-base b/helgrind/.svn/text-base/hg_basics.h.svn-base
new file mode 100644
index 0000000..4923c0e
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_basics.h.svn-base
@@ -0,0 +1,97 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Basic definitions for all of Helgrind.                       ---*/
+/*---                                                  hg_basics.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __HG_BASICS_H
+#define __HG_BASICS_H
+
+
+/*----------------------------------------------------------------*/
+/*--- Very basic stuff                                         ---*/
+/*----------------------------------------------------------------*/
+
+#define HG_(str) VGAPPEND(vgHelgrind_,str)
+
+void* HG_(zalloc) ( HChar* cc, SizeT n );
+void  HG_(free)   ( void* p );
+Char* HG_(strdup) ( HChar* cc, const Char* s );
+
+static inline Bool HG_(is_sane_ThreadId) ( ThreadId coretid ) {
+   return coretid >= 0 && coretid < VG_N_THREADS;
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- Command line options                                     ---*/
+/*----------------------------------------------------------------*/
+
+/* Flags for controlling for which events sanity checking is done */
+#define SCE_THREADS  (1<<0)  // Sanity check at thread create/join
+#define SCE_LOCKS    (1<<1)  // Sanity check at lock events
+#define SCE_BIGRANGE (1<<2)  // Sanity check at big mem range events
+#define SCE_ACCESS   (1<<3)  // Sanity check at mem accesses
+#define SCE_LAOG     (1<<4)  // Sanity check at significant LAOG events
+
+#define SCE_BIGRANGE_T 256  // big mem range minimum size
+
+
+/* Enable/disable lock order checking.  Sometimes it produces a lot of
+   errors, possibly genuine, which nevertheless can be very
+   annoying. */
+extern Bool HG_(clo_track_lockorders);
+
+/* When comparing race errors for equality, should the race address be
+   taken into account?  For users, no, but for verification purposes
+   (regtesting) this is sometimes important. */
+extern Bool HG_(clo_cmp_race_err_addrs);
+
+/* Show conflicting accesses?  This involves collecting and storing
+   large numbers of call stacks just in case we might need to show
+   them later, and so is expensive (although very useful).  Hence
+   allow it to be optionally disabled. */
+extern Bool HG_(clo_show_conflicts);
+
+/* Size of the conflicting-access cache, measured in terms of
+   maximum possible number of elements in the previous-access map.
+   Must be between 10k amd 10 million.  Default is 1 million. */
+extern UWord HG_(clo_conflict_cache_size);
+
+/* Sanity check level.  This is an or-ing of
+   SCE_{THREADS,LOCKS,BIGRANGE,ACCESS,LAOG}. */
+extern Word HG_(clo_sanity_flags);
+
+
+
+
+#endif /* ! __HG_BASICS_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              hg_basics.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/hg_errors.c.svn-base b/helgrind/.svn/text-base/hg_errors.c.svn-base
new file mode 100644
index 0000000..fbb499b
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_errors.c.svn-base
@@ -0,0 +1,813 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Error management for Helgrind.                               ---*/
+/*---                                                  hg_errors.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_errormgr.h"
+#include "pub_tool_wordfm.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_debuginfo.h"
+#include "pub_tool_threadstate.h"
+
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"
+#include "libhb.h"
+#include "hg_errors.h"            /* self */
+
+
+/*----------------------------------------------------------------*/
+/*---                                                          ---*/
+/*----------------------------------------------------------------*/
+
+/* This has to do with printing error messages.  See comments on
+   announce_threadset() and summarise_threadset().  Perhaps it
+   should be a command line option. */
+#define N_THREADS_TO_ANNOUNCE 5
+
+
+/*----------------------------------------------------------------*/
+/*--- Error management                                         ---*/
+/*----------------------------------------------------------------*/
+
+/* maps (by value) strings to a copy of them in ARENA_TOOL */
+
+static WordFM* string_table = NULL;
+
+ULong HG_(stats__string_table_queries) = 0;
+
+ULong HG_(stats__string_table_get_map_size) ( void ) {
+   return string_table ? (ULong)VG_(sizeFM)(string_table) : 0;
+}
+
+static Word string_table_cmp ( UWord s1, UWord s2 ) {
+   return (Word)VG_(strcmp)( (HChar*)s1, (HChar*)s2 );
+}
+
+static HChar* string_table_strdup ( HChar* str ) {
+   HChar* copy = NULL;
+   HG_(stats__string_table_queries)++;
+   if (!str)
+      str = "(null)";
+   if (!string_table) {
+      string_table = VG_(newFM)( HG_(zalloc), "hg.sts.1",
+                                 HG_(free), string_table_cmp );
+      tl_assert(string_table);
+   }
+   if (VG_(lookupFM)( string_table,
+                      NULL, (Word*)&copy, (Word)str )) {
+      tl_assert(copy);
+      if (0) VG_(printf)("string_table_strdup: %p -> %p\n", str, copy );
+      return copy;
+   } else {
+      copy = HG_(strdup)("hg.sts.2", str);
+      tl_assert(copy);
+      VG_(addToFM)( string_table, (Word)copy, (Word)copy );
+      return copy;
+   }
+}
+
+/* maps from Lock .unique fields to LockP*s */
+
+static WordFM* map_LockN_to_P = NULL;
+
+ULong HG_(stats__LockN_to_P_queries) = 0;
+
+ULong HG_(stats__LockN_to_P_get_map_size) ( void ) {
+   return map_LockN_to_P ? (ULong)VG_(sizeFM)(map_LockN_to_P) : 0;
+}
+
+static Word lock_unique_cmp ( UWord lk1W, UWord lk2W )
+{
+   Lock* lk1 = (Lock*)lk1W;
+   Lock* lk2 = (Lock*)lk2W;
+   tl_assert( HG_(is_sane_LockNorP)(lk1) );
+   tl_assert( HG_(is_sane_LockNorP)(lk2) );
+   if (lk1->unique < lk2->unique) return -1;
+   if (lk1->unique > lk2->unique) return 1;
+   return 0;
+}
+
+static Lock* mk_LockP_from_LockN ( Lock* lkn )
+{
+   Lock* lkp = NULL;
+   HG_(stats__LockN_to_P_queries)++;
+   tl_assert( HG_(is_sane_LockN)(lkn) );
+   if (!map_LockN_to_P) {
+      map_LockN_to_P = VG_(newFM)( HG_(zalloc), "hg.mLPfLN.1",
+                                   HG_(free), lock_unique_cmp );
+      tl_assert(map_LockN_to_P);
+   }
+   if (!VG_(lookupFM)( map_LockN_to_P, NULL, (Word*)&lkp, (Word)lkn)) {
+      lkp = HG_(zalloc)( "hg.mLPfLN.2", sizeof(Lock) );
+      *lkp = *lkn;
+      lkp->admin = NULL;
+      lkp->magic = LockP_MAGIC;
+      /* Forget about the bag of lock holders - don't copy that.
+         Also, acquired_at should be NULL whenever heldBy is, and vice
+         versa.  Also forget about the associated libhb synch object. */
+      lkp->heldW  = False;
+      lkp->heldBy = NULL;
+      lkp->acquired_at = NULL;
+      lkp->hbso = NULL;
+      VG_(addToFM)( map_LockN_to_P, (Word)lkp, (Word)lkp );
+   }
+   tl_assert( HG_(is_sane_LockP)(lkp) );
+   return lkp;
+}
+
+/* Errors:
+
+      race: program counter
+            read or write
+            data size
+            previous state
+            current state
+
+      FIXME: how does state printing interact with lockset gc?
+      Are the locksets in prev/curr state always valid?
+      Ditto question for the threadsets
+          ThreadSets - probably are always valid if Threads
+          are never thrown away.
+          LockSets - could at least print the lockset elements that
+          correspond to actual locks at the time of printing.  Hmm.
+*/
+
+/* Error kinds */
+typedef
+   enum {
+      XE_Race=1101,      // race
+      XE_FreeMemLock,    // freeing memory containing a locked lock
+      XE_UnlockUnlocked, // unlocking a not-locked lock
+      XE_UnlockForeign,  // unlocking a lock held by some other thread
+      XE_UnlockBogus,    // unlocking an address not known to be a lock
+      XE_PthAPIerror,    // error from the POSIX pthreads API
+      XE_LockOrder,      // lock order error
+      XE_Misc            // misc other error (w/ string to describe it)
+   }
+   XErrorTag;
+
+/* Extra contexts for kinds */
+typedef
+   struct  {
+      XErrorTag tag;
+      union {
+         struct {
+            Addr  data_addr;
+            Int   szB;
+            Bool  isWrite;
+            ExeContext* mb_lastlock;
+            ExeContext* mb_confacc;
+            Thread* thr;
+            Thread* mb_confaccthr;
+            Int   mb_confaccSzB;
+            Bool  mb_confaccIsW;
+            Char  descr1[96];
+            Char  descr2[96];
+         } Race;
+         struct {
+            Thread* thr;  /* doing the freeing */
+            Lock*   lock; /* lock which is locked */
+         } FreeMemLock;
+         struct {
+            Thread* thr;  /* doing the unlocking */
+            Lock*   lock; /* lock (that is already unlocked) */
+         } UnlockUnlocked;
+         struct {
+            Thread* thr;    /* doing the unlocking */
+            Thread* owner;  /* thread that actually holds the lock */
+            Lock*   lock;   /* lock (that is held by 'owner') */
+         } UnlockForeign;
+         struct {
+            Thread* thr;     /* doing the unlocking */
+            Addr    lock_ga; /* purported address of the lock */
+         } UnlockBogus;
+         struct {
+            Thread* thr; 
+            HChar*  fnname; /* persistent, in tool-arena */
+            Word    err;    /* pth error code */
+            HChar*  errstr; /* persistent, in tool-arena */
+         } PthAPIerror;
+         struct {
+            Thread*     thr;
+            Addr        before_ga; /* always locked first in prog. history */
+            Addr        after_ga;
+            ExeContext* before_ec;
+            ExeContext* after_ec;
+         } LockOrder;
+         struct {
+            Thread* thr;
+            HChar*  errstr; /* persistent, in tool-arena */
+         } Misc;
+      } XE;
+   }
+   XError;
+
+static void init_XError ( XError* xe ) {
+   VG_(memset)(xe, 0, sizeof(*xe) );
+   xe->tag = XE_Race-1; /* bogus */
+}
+
+
+/* Extensions of suppressions */
+typedef
+   enum {
+      XS_Race=1201, /* race */
+      XS_FreeMemLock,
+      XS_UnlockUnlocked,
+      XS_UnlockForeign,
+      XS_UnlockBogus,
+      XS_PthAPIerror,
+      XS_LockOrder,
+      XS_Misc
+   }
+   XSuppTag;
+
+
+/* Updates the copy with address info if necessary. */
+UInt HG_(update_extra) ( Error* err )
+{
+   XError* xe = (XError*)VG_(get_error_extra)(err);
+   tl_assert(xe);
+   //if (extra != NULL && Undescribed == extra->addrinfo.akind) {
+   //   describe_addr ( VG_(get_error_address)(err), &(extra->addrinfo) );
+   //}
+
+   if (xe->tag == XE_Race) {
+      /* See if we can come up with a source level description of the
+         raced-upon address.  This is potentially expensive, which is
+         why it's only done at the update_extra point, not when the
+         error is initially created. */
+      static Int xxx = 0;
+      xxx++;
+      if (0)
+         VG_(printf)("HG_(update_extra): "
+                     "%d conflicting-event queries\n", xxx);
+      tl_assert(sizeof(xe->XE.Race.descr1) == sizeof(xe->XE.Race.descr2));
+      if (VG_(get_data_description)(
+                &xe->XE.Race.descr1[0],
+                &xe->XE.Race.descr2[0],
+                sizeof(xe->XE.Race.descr1)-1,
+                xe->XE.Race.data_addr )) {
+         tl_assert( xe->XE.Race.descr1
+                       [ sizeof(xe->XE.Race.descr1)-1 ] == 0);
+         tl_assert( xe->XE.Race.descr2
+                       [ sizeof(xe->XE.Race.descr2)-1 ] == 0);
+      }
+      { Thr* thrp = NULL;
+        ExeContext* wherep = NULL;
+        Addr  acc_addr = xe->XE.Race.data_addr;
+        Int   acc_szB  = xe->XE.Race.szB;
+        Thr*  acc_thr  = xe->XE.Race.thr->hbthr;
+        Bool  acc_isW  = xe->XE.Race.isWrite;
+        SizeT conf_szB = 0;
+        Bool  conf_isW = False;
+        tl_assert(!xe->XE.Race.mb_confacc);
+        tl_assert(!xe->XE.Race.mb_confaccthr);
+        if (libhb_event_map_lookup(
+               &wherep, &thrp, &conf_szB, &conf_isW,
+               acc_thr, acc_addr, acc_szB, acc_isW )) {
+           Thread* threadp;
+           tl_assert(wherep);
+           tl_assert(thrp);
+           threadp = libhb_get_Thr_opaque( thrp );
+           tl_assert(threadp);
+           xe->XE.Race.mb_confacc = wherep;
+           xe->XE.Race.mb_confaccthr = threadp;
+           xe->XE.Race.mb_confaccSzB = (Int)conf_szB;
+           xe->XE.Race.mb_confaccIsW = conf_isW;
+        }
+      }
+   }
+
+   return sizeof(XError);
+}
+
+void HG_(record_error_Race) ( Thread* thr, 
+                              Addr data_addr, Int szB, Bool isWrite,
+                              ExeContext* mb_lastlock )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+
+#  if defined(VGO_linux)
+   /* Skip any races on locations apparently in GOTPLT sections.  This
+      is said to be caused by ld.so poking PLT table entries (or
+      whatever) when it writes the resolved address of a dynamically
+      linked routine, into the table (or whatever) when it is called
+      for the first time. */
+   {
+     VgSectKind sect = VG_(seginfo_sect_kind)( NULL, 0, data_addr );
+     if (0) VG_(printf)("XXXXXXXXX RACE on %#lx %s\n",
+                        data_addr, VG_(pp_SectKind)(sect));
+     /* SectPLT is required on ???-linux */
+     if (sect == Vg_SectGOTPLT) return;
+     /* SectPLT is required on ppc32/64-linux */
+     if (sect == Vg_SectPLT) return;
+   }
+#  endif
+
+   init_XError(&xe);
+   xe.tag = XE_Race;
+   xe.XE.Race.data_addr   = data_addr;
+   xe.XE.Race.szB         = szB;
+   xe.XE.Race.isWrite     = isWrite;
+   xe.XE.Race.mb_lastlock = mb_lastlock;
+   xe.XE.Race.thr         = thr;
+   tl_assert(isWrite == False || isWrite == True);
+   tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
+   xe.XE.Race.descr1[0] = xe.XE.Race.descr2[0] = 0;
+   // FIXME: tid vs thr
+   // Skip on any of the conflicting-access info at this point.
+   // It's expensive to obtain, and this error is more likely than
+   // not to be discarded.  We'll fill these fields in in 
+   // HG_(update_extra) just above, assuming the error ever makes
+   // it that far (unlikely).
+   xe.XE.Race.mb_confaccSzB = 0;
+   xe.XE.Race.mb_confaccIsW = False;
+   xe.XE.Race.mb_confacc    = NULL;
+   xe.XE.Race.mb_confaccthr = NULL;
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_Race, data_addr, NULL, &xe );
+}
+
+void HG_(record_error_FreeMemLock) ( Thread* thr, Lock* lk )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+   init_XError(&xe);
+   xe.tag = XE_FreeMemLock;
+   xe.XE.FreeMemLock.thr  = thr;
+   xe.XE.FreeMemLock.lock = mk_LockP_from_LockN(lk);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_FreeMemLock, 0, NULL, &xe );
+}
+
+void HG_(record_error_UnlockUnlocked) ( Thread* thr, Lock* lk )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+   init_XError(&xe);
+   xe.tag = XE_UnlockUnlocked;
+   xe.XE.UnlockUnlocked.thr  = thr;
+   xe.XE.UnlockUnlocked.lock = mk_LockP_from_LockN(lk);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_UnlockUnlocked, 0, NULL, &xe );
+}
+
+void HG_(record_error_UnlockForeign) ( Thread* thr,
+                                       Thread* owner, Lock* lk )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert( HG_(is_sane_Thread)(owner) );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+   init_XError(&xe);
+   xe.tag = XE_UnlockForeign;
+   xe.XE.UnlockForeign.thr   = thr;
+   xe.XE.UnlockForeign.owner = owner;
+   xe.XE.UnlockForeign.lock  = mk_LockP_from_LockN(lk);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_UnlockForeign, 0, NULL, &xe );
+}
+
+void HG_(record_error_UnlockBogus) ( Thread* thr, Addr lock_ga )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   init_XError(&xe);
+   xe.tag = XE_UnlockBogus;
+   xe.XE.UnlockBogus.thr     = thr;
+   xe.XE.UnlockBogus.lock_ga = lock_ga;
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_UnlockBogus, 0, NULL, &xe );
+}
+
+void HG_(record_error_LockOrder)(
+        Thread* thr, Addr before_ga, Addr after_ga,
+        ExeContext* before_ec, ExeContext* after_ec 
+     )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   if (!HG_(clo_track_lockorders))
+      return;
+   init_XError(&xe);
+   xe.tag = XE_LockOrder;
+   xe.XE.LockOrder.thr       = thr;
+   xe.XE.LockOrder.before_ga = before_ga;
+   xe.XE.LockOrder.before_ec = before_ec;
+   xe.XE.LockOrder.after_ga  = after_ga;
+   xe.XE.LockOrder.after_ec  = after_ec;
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_LockOrder, 0, NULL, &xe );
+}
+
+void HG_(record_error_PthAPIerror) ( Thread* thr, HChar* fnname, 
+                                     Word err, HChar* errstr )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert(fnname);
+   tl_assert(errstr);
+   init_XError(&xe);
+   xe.tag = XE_PthAPIerror;
+   xe.XE.PthAPIerror.thr    = thr;
+   xe.XE.PthAPIerror.fnname = string_table_strdup(fnname);
+   xe.XE.PthAPIerror.err    = err;
+   xe.XE.PthAPIerror.errstr = string_table_strdup(errstr);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_PthAPIerror, 0, NULL, &xe );
+}
+
+void HG_(record_error_Misc) ( Thread* thr, HChar* errstr )
+{
+   XError xe;
+   tl_assert( HG_(is_sane_Thread)(thr) );
+   tl_assert(errstr);
+   init_XError(&xe);
+   xe.tag = XE_Misc;
+   xe.XE.Misc.thr    = thr;
+   xe.XE.Misc.errstr = string_table_strdup(errstr);
+   // FIXME: tid vs thr
+   tl_assert( HG_(is_sane_ThreadId)(thr->coretid) );
+   tl_assert( thr->coretid != VG_INVALID_THREADID );
+   VG_(maybe_record_error)( thr->coretid,
+                            XE_Misc, 0, NULL, &xe );
+}
+
+Bool HG_(eq_Error) ( VgRes not_used, Error* e1, Error* e2 )
+{
+   XError *xe1, *xe2;
+
+   tl_assert(VG_(get_error_kind)(e1) == VG_(get_error_kind)(e2));
+
+   xe1 = (XError*)VG_(get_error_extra)(e1);
+   xe2 = (XError*)VG_(get_error_extra)(e2);
+   tl_assert(xe1);
+   tl_assert(xe2);
+
+   switch (VG_(get_error_kind)(e1)) {
+      case XE_Race:
+         return xe1->XE.Race.szB == xe2->XE.Race.szB
+                && xe1->XE.Race.isWrite == xe2->XE.Race.isWrite
+                && (HG_(clo_cmp_race_err_addrs)
+                       ? xe1->XE.Race.data_addr == xe2->XE.Race.data_addr
+                       : True);
+      case XE_FreeMemLock:
+         return xe1->XE.FreeMemLock.thr == xe2->XE.FreeMemLock.thr
+                && xe1->XE.FreeMemLock.lock == xe2->XE.FreeMemLock.lock;
+      case XE_UnlockUnlocked:
+         return xe1->XE.UnlockUnlocked.thr == xe2->XE.UnlockUnlocked.thr
+                && xe1->XE.UnlockUnlocked.lock == xe2->XE.UnlockUnlocked.lock;
+      case XE_UnlockForeign:
+         return xe1->XE.UnlockForeign.thr == xe2->XE.UnlockForeign.thr
+                && xe1->XE.UnlockForeign.owner == xe2->XE.UnlockForeign.owner
+                && xe1->XE.UnlockForeign.lock == xe2->XE.UnlockForeign.lock;
+      case XE_UnlockBogus:
+         return xe1->XE.UnlockBogus.thr == xe2->XE.UnlockBogus.thr
+                && xe1->XE.UnlockBogus.lock_ga == xe2->XE.UnlockBogus.lock_ga;
+      case XE_PthAPIerror:
+         return xe1->XE.PthAPIerror.thr == xe2->XE.PthAPIerror.thr
+                && 0==VG_(strcmp)(xe1->XE.PthAPIerror.fnname,
+                                  xe2->XE.PthAPIerror.fnname)
+                && xe1->XE.PthAPIerror.err == xe2->XE.PthAPIerror.err;
+      case XE_LockOrder:
+         return xe1->XE.LockOrder.thr == xe2->XE.LockOrder.thr;
+      case XE_Misc:
+         return xe1->XE.Misc.thr == xe2->XE.Misc.thr
+                && 0==VG_(strcmp)(xe1->XE.Misc.errstr, xe2->XE.Misc.errstr);
+      default:
+         tl_assert(0);
+   }
+
+   /*NOTREACHED*/
+   tl_assert(0);
+}
+
+
+/* Announce (that is, print the point-of-creation) of 'thr'.  Only do
+   this once, as we only want to see these announcements once per
+   thread. */
+static void announce_one_thread ( Thread* thr ) 
+{
+   tl_assert(HG_(is_sane_Thread)(thr));
+   tl_assert(thr->errmsg_index >= 1);
+   if (!thr->announced) {
+      if (thr->errmsg_index == 1) {
+         tl_assert(thr->created_at == NULL);
+         VG_(message)(Vg_UserMsg, "Thread #%d is the program's root thread",
+                                  thr->errmsg_index);
+      } else {
+         tl_assert(thr->created_at != NULL);
+         VG_(message)(Vg_UserMsg, "Thread #%d was created",
+                                  thr->errmsg_index);
+         VG_(pp_ExeContext)( thr->created_at );
+      }
+      VG_(message)(Vg_UserMsg, "");
+      thr->announced = True;
+   }
+}
+
+
+void HG_(pp_Error) ( Error* err )
+{
+   XError *xe = (XError*)VG_(get_error_extra)(err);
+
+   switch (VG_(get_error_kind)(err)) {
+
+   case XE_Misc: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_Thread)( xe->XE.Misc.thr ) );
+      announce_one_thread( xe->XE.Misc.thr );
+      VG_(message)(Vg_UserMsg,
+                  "Thread #%d: %s",
+                  (Int)xe->XE.Misc.thr->errmsg_index,
+                  xe->XE.Misc.errstr);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      break;
+   }
+
+   case XE_LockOrder: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_Thread)( xe->XE.LockOrder.thr ) );
+      announce_one_thread( xe->XE.LockOrder.thr );
+      VG_(message)(Vg_UserMsg,
+                  "Thread #%d: lock order \"%p before %p\" violated",
+                  (Int)xe->XE.LockOrder.thr->errmsg_index,
+                  (void*)xe->XE.LockOrder.before_ga,
+                  (void*)xe->XE.LockOrder.after_ga);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.LockOrder.before_ec && xe->XE.LockOrder.after_ec) {
+         VG_(message)(Vg_UserMsg,
+            "  Required order was established by acquisition of lock at %p",
+            (void*)xe->XE.LockOrder.before_ga);
+         VG_(pp_ExeContext)( xe->XE.LockOrder.before_ec );
+         VG_(message)(Vg_UserMsg,
+            "  followed by a later acquisition of lock at %p", 
+            (void*)xe->XE.LockOrder.after_ga);
+         VG_(pp_ExeContext)( xe->XE.LockOrder.after_ec );
+      }
+      break;
+   }
+
+   case XE_PthAPIerror: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_Thread)( xe->XE.PthAPIerror.thr ) );
+      announce_one_thread( xe->XE.PthAPIerror.thr );
+      VG_(message)(Vg_UserMsg,
+                  "Thread #%d's call to %s failed",
+                  (Int)xe->XE.PthAPIerror.thr->errmsg_index,
+                  xe->XE.PthAPIerror.fnname);
+      VG_(message)(Vg_UserMsg,
+                  "   with error code %ld (%s)",
+                  xe->XE.PthAPIerror.err,
+                  xe->XE.PthAPIerror.errstr);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      break;
+   }
+
+   case XE_UnlockBogus: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_Thread)( xe->XE.UnlockBogus.thr ) );
+      announce_one_thread( xe->XE.UnlockBogus.thr );
+      VG_(message)(Vg_UserMsg,
+                   "Thread #%d unlocked an invalid lock at %p ",
+                   (Int)xe->XE.UnlockBogus.thr->errmsg_index,
+                   (void*)xe->XE.UnlockBogus.lock_ga);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      break;
+   }
+
+   case XE_UnlockForeign: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_LockP)( xe->XE.UnlockForeign.lock ) );
+      tl_assert( HG_(is_sane_Thread)( xe->XE.UnlockForeign.owner ) );
+      tl_assert( HG_(is_sane_Thread)( xe->XE.UnlockForeign.thr ) );
+      announce_one_thread( xe->XE.UnlockForeign.thr );
+      announce_one_thread( xe->XE.UnlockForeign.owner );
+      VG_(message)(Vg_UserMsg,
+                   "Thread #%d unlocked lock at %p "
+                   "currently held by thread #%d",
+                   (Int)xe->XE.UnlockForeign.thr->errmsg_index,
+                   (void*)xe->XE.UnlockForeign.lock->guestaddr,
+                   (Int)xe->XE.UnlockForeign.owner->errmsg_index );
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.UnlockForeign.lock->appeared_at) {
+         VG_(message)(Vg_UserMsg,
+                      "  Lock at %p was first observed",
+                      (void*)xe->XE.UnlockForeign.lock->guestaddr);
+         VG_(pp_ExeContext)( xe->XE.UnlockForeign.lock->appeared_at );
+      }
+      break;
+   }
+
+   case XE_UnlockUnlocked: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_LockP)( xe->XE.UnlockUnlocked.lock ) );
+      tl_assert( HG_(is_sane_Thread)( xe->XE.UnlockUnlocked.thr ) );
+      announce_one_thread( xe->XE.UnlockUnlocked.thr );
+      VG_(message)(Vg_UserMsg,
+                   "Thread #%d unlocked a not-locked lock at %p ",
+                   (Int)xe->XE.UnlockUnlocked.thr->errmsg_index,
+                   (void*)xe->XE.UnlockUnlocked.lock->guestaddr);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.UnlockUnlocked.lock->appeared_at) {
+         VG_(message)(Vg_UserMsg,
+                      "  Lock at %p was first observed",
+                      (void*)xe->XE.UnlockUnlocked.lock->guestaddr);
+         VG_(pp_ExeContext)( xe->XE.UnlockUnlocked.lock->appeared_at );
+      }
+      break;
+   }
+
+   case XE_FreeMemLock: {
+      tl_assert(xe);
+      tl_assert( HG_(is_sane_LockP)( xe->XE.FreeMemLock.lock ) );
+      tl_assert( HG_(is_sane_Thread)( xe->XE.FreeMemLock.thr ) );
+      announce_one_thread( xe->XE.FreeMemLock.thr );
+      VG_(message)(Vg_UserMsg,
+                   "Thread #%d deallocated location %p "
+                   "containing a locked lock",
+                   (Int)xe->XE.FreeMemLock.thr->errmsg_index,
+                   (void*)xe->XE.FreeMemLock.lock->guestaddr);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.FreeMemLock.lock->appeared_at) {
+         VG_(message)(Vg_UserMsg,
+                      "  Lock at %p was first observed",
+                      (void*)xe->XE.FreeMemLock.lock->guestaddr);
+         VG_(pp_ExeContext)( xe->XE.FreeMemLock.lock->appeared_at );
+      }
+      break;
+   }
+
+   case XE_Race: {
+      Addr      err_ga;
+      HChar*    what;
+      Int       szB;
+      what      = xe->XE.Race.isWrite ? "write" : "read";
+      szB       = xe->XE.Race.szB;
+      err_ga = VG_(get_error_address)(err);
+
+      announce_one_thread( xe->XE.Race.thr );
+      if (xe->XE.Race.mb_confaccthr)
+         announce_one_thread( xe->XE.Race.mb_confaccthr );
+      VG_(message)(Vg_UserMsg,
+         "Possible data race during %s of size %d at %#lx by thread #%d",
+         what, szB, err_ga, (Int)xe->XE.Race.thr->errmsg_index
+      );
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      if (xe->XE.Race.mb_confacc) {
+         if (xe->XE.Race.mb_confaccthr) {
+            VG_(message)(Vg_UserMsg,
+               " This conflicts with a previous %s of size %d by thread #%d",
+               xe->XE.Race.mb_confaccIsW ? "write" : "read",
+               xe->XE.Race.mb_confaccSzB,
+               xe->XE.Race.mb_confaccthr->errmsg_index
+            );
+         } else {
+            // FIXME: can this ever happen?
+            VG_(message)(Vg_UserMsg,
+               " This conflicts with a previous %s of size %d",
+               xe->XE.Race.mb_confaccIsW ? "write" : "read",
+               xe->XE.Race.mb_confaccSzB
+            );
+         }
+         VG_(pp_ExeContext)( xe->XE.Race.mb_confacc );
+      }
+
+
+      /* If we have a better description of the address, show it. */
+      if (xe->XE.Race.descr1[0] != 0)
+         VG_(message)(Vg_UserMsg, " %s", &xe->XE.Race.descr1[0]);
+      if (xe->XE.Race.descr2[0] != 0)
+         VG_(message)(Vg_UserMsg, " %s", &xe->XE.Race.descr2[0]);
+
+      break; /* case XE_Race */
+   } /* case XE_Race */
+
+   default:
+      tl_assert(0);
+   } /* switch (VG_(get_error_kind)(err)) */
+}
+
+Char* HG_(get_error_name) ( Error* err )
+{
+   switch (VG_(get_error_kind)(err)) {
+      case XE_Race:           return "Race";
+      case XE_FreeMemLock:    return "FreeMemLock";
+      case XE_UnlockUnlocked: return "UnlockUnlocked";
+      case XE_UnlockForeign:  return "UnlockForeign";
+      case XE_UnlockBogus:    return "UnlockBogus";
+      case XE_PthAPIerror:    return "PthAPIerror";
+      case XE_LockOrder:      return "LockOrder";
+      case XE_Misc:           return "Misc";
+      default: tl_assert(0); /* fill in missing case */
+   }
+}
+
+Bool HG_(recognised_suppression) ( Char* name, Supp *su )
+{
+#  define TRY(_name,_xskind)                   \
+      if (0 == VG_(strcmp)(name, (_name))) {   \
+         VG_(set_supp_kind)(su, (_xskind));    \
+         return True;                          \
+      }
+   TRY("Race",           XS_Race);
+   TRY("FreeMemLock",    XS_FreeMemLock);
+   TRY("UnlockUnlocked", XS_UnlockUnlocked);
+   TRY("UnlockForeign",  XS_UnlockForeign);
+   TRY("UnlockBogus",    XS_UnlockBogus);
+   TRY("PthAPIerror",    XS_PthAPIerror);
+   TRY("LockOrder",      XS_LockOrder);
+   TRY("Misc",           XS_Misc);
+   return False;
+#  undef TRY
+}
+
+Bool HG_(read_extra_suppression_info) ( Int fd, Char* buf, Int nBuf,
+                                        Supp* su )
+{
+   /* do nothing -- no extra suppression info present.  Return True to
+      indicate nothing bad happened. */
+   return True;
+}
+
+Bool HG_(error_matches_suppression) ( Error* err, Supp* su )
+{
+   switch (VG_(get_supp_kind)(su)) {
+   case XS_Race:           return VG_(get_error_kind)(err) == XE_Race;
+   case XS_FreeMemLock:    return VG_(get_error_kind)(err) == XE_FreeMemLock;
+   case XS_UnlockUnlocked: return VG_(get_error_kind)(err) == XE_UnlockUnlocked;
+   case XS_UnlockForeign:  return VG_(get_error_kind)(err) == XE_UnlockForeign;
+   case XS_UnlockBogus:    return VG_(get_error_kind)(err) == XE_UnlockBogus;
+   case XS_PthAPIerror:    return VG_(get_error_kind)(err) == XE_PthAPIerror;
+   case XS_LockOrder:      return VG_(get_error_kind)(err) == XE_LockOrder;
+   case XS_Misc:           return VG_(get_error_kind)(err) == XE_Misc;
+   //case XS_: return VG_(get_error_kind)(err) == XE_;
+   default: tl_assert(0); /* fill in missing cases */
+   }
+}
+
+void HG_(print_extra_suppression_info) ( Error* err )
+{
+   /* Do nothing */
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              hg_errors.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/hg_errors.h.svn-base b/helgrind/.svn/text-base/hg_errors.h.svn-base
new file mode 100644
index 0000000..a45173a
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_errors.h.svn-base
@@ -0,0 +1,71 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Error management for Helgrind.                               ---*/
+/*---                                                  hg_errors.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __HG_ERRORS_H
+#define __HG_ERRORS_H
+
+
+/* The standard bundle of error management functions that we are
+required to present to the core/tool interface at startup. */
+Bool  HG_(eq_Error) ( VgRes not_used, Error* e1, Error* e2 );
+void  HG_(pp_Error) ( Error* err );
+UInt  HG_(update_extra) ( Error* err );
+Bool  HG_(recognised_suppression) ( Char* name, Supp *su );
+Bool  HG_(read_extra_suppression_info) ( Int fd, Char* buf, Int nBuf,
+                                         Supp* su );
+Bool  HG_(error_matches_suppression) ( Error* err, Supp* su );
+Char* HG_(get_error_name) ( Error* err );
+void  HG_(print_extra_suppression_info) ( Error* err );
+
+/* Functions for recording various kinds of errors. */
+void HG_(record_error_Race) ( Thread* thr, 
+                              Addr data_addr, Int szB, Bool isWrite,
+                              ExeContext* mb_lastlock );
+void HG_(record_error_FreeMemLock)    ( Thread* thr, Lock* lk );
+void HG_(record_error_UnlockUnlocked) ( Thread*, Lock* );
+void HG_(record_error_UnlockForeign)  ( Thread*, Thread*, Lock* );
+void HG_(record_error_UnlockBogus)    ( Thread*, Addr );
+void HG_(record_error_PthAPIerror)    ( Thread*, HChar*, Word, HChar* );
+void HG_(record_error_LockOrder)      ( Thread*, Addr, Addr,
+                                        ExeContext*, ExeContext* );
+void HG_(record_error_Misc)           ( Thread*, HChar* );
+
+/* Statistics pertaining to error management. */
+extern ULong HG_(stats__LockN_to_P_queries);
+extern ULong HG_(stats__LockN_to_P_get_map_size) ( void );
+extern ULong HG_(stats__string_table_queries);
+extern ULong HG_(stats__string_table_get_map_size) ( void );
+
+#endif /* ! __HG_ERRORS_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              hg_errors.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/hg_intercepts.c.svn-base b/helgrind/.svn/text-base/hg_intercepts.c.svn-base
new file mode 100644
index 0000000..c2ea907
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_intercepts.c.svn-base
@@ -0,0 +1,1734 @@
+
+/*--------------------------------------------------------------------*/
+/*--- pthread intercepts for thread checking.                      ---*/
+/*---                                              tc_intercepts.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks LLP
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+*/
+
+/* RUNS ON SIMULATED CPU
+   Interceptors for pthread_* functions, so that tc_main can see
+   significant thread events. 
+
+   Important: when adding a function wrapper to this file, remember to
+   add a test case to tc20_verifywrap.c.  A common cause of failure is
+   for wrappers to not engage on different distros, and
+   tc20_verifywrap essentially checks that each wrapper is really
+   doing something.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_redir.h"
+#include "valgrind.h"
+#include "helgrind.h"
+
+#define TRACE_PTH_FNS 0
+#define TRACE_QT4_FNS 0
+
+
+/*----------------------------------------------------------------*/
+/*---                                                          ---*/
+/*----------------------------------------------------------------*/
+
+#define PTH_FUNC(ret_ty, f, args...) \
+   ret_ty I_WRAP_SONAME_FNNAME_ZZ(libpthreadZdsoZd0,f)(args); \
+   ret_ty I_WRAP_SONAME_FNNAME_ZZ(libpthreadZdsoZd0,f)(args)
+
+// Do a client request.  This is a macro rather than a function 
+// so as to avoid having an extra function in the stack trace.
+
+#define DO_CREQ_v_W(_creqF, _ty1F,_arg1F)                \
+   do {                                                  \
+      Word _unused_res, _arg1;                           \
+      assert(sizeof(_ty1F) == sizeof(Word));             \
+      _arg1 = (Word)(_arg1F);                            \
+      VALGRIND_DO_CLIENT_REQUEST(_unused_res, 0,         \
+                                 (_creqF),               \
+                                 _arg1, 0,0,0,0);        \
+   } while (0)
+
+#define DO_CREQ_v_WW(_creqF, _ty1F,_arg1F, _ty2F,_arg2F) \
+   do {                                                  \
+      Word _unused_res, _arg1, _arg2;                    \
+      assert(sizeof(_ty1F) == sizeof(Word));             \
+      assert(sizeof(_ty2F) == sizeof(Word));             \
+      _arg1 = (Word)(_arg1F);                            \
+      _arg2 = (Word)(_arg2F);                            \
+      VALGRIND_DO_CLIENT_REQUEST(_unused_res, 0,         \
+                                 (_creqF),               \
+                                 _arg1,_arg2,0,0,0);     \
+   } while (0)
+
+#define DO_CREQ_W_WW(_resF, _creqF, _ty1F,_arg1F, _ty2F,_arg2F)	\
+   do {                                                  \
+      Word _res, _arg1, _arg2;                           \
+      assert(sizeof(_ty1F) == sizeof(Word));             \
+      assert(sizeof(_ty2F) == sizeof(Word));             \
+      _arg1 = (Word)(_arg1F);                            \
+      _arg2 = (Word)(_arg2F);                            \
+      VALGRIND_DO_CLIENT_REQUEST(_res, 2,                \
+                                 (_creqF),               \
+                                 _arg1,_arg2,0,0,0);     \
+      _resF = _res;                                      \
+   } while (0)
+
+#define DO_CREQ_v_WWW(_creqF, _ty1F,_arg1F,              \
+		      _ty2F,_arg2F, _ty3F, _arg3F)       \
+   do {                                                  \
+      Word _unused_res, _arg1, _arg2, _arg3;             \
+      assert(sizeof(_ty1F) == sizeof(Word));             \
+      assert(sizeof(_ty2F) == sizeof(Word));             \
+      assert(sizeof(_ty3F) == sizeof(Word));             \
+      _arg1 = (Word)(_arg1F);                            \
+      _arg2 = (Word)(_arg2F);                            \
+      _arg3 = (Word)(_arg3F);                            \
+      VALGRIND_DO_CLIENT_REQUEST(_unused_res, 0,         \
+                                 (_creqF),               \
+                                 _arg1,_arg2,_arg3,0,0); \
+   } while (0)
+
+
+#define DO_PthAPIerror(_fnnameF, _errF)                  \
+   do {                                                  \
+      char* _fnname = (char*)(_fnnameF);                 \
+      long  _err    = (long)(int)(_errF);	         \
+      char* _errstr = lame_strerror(_err);               \
+      DO_CREQ_v_WWW(_VG_USERREQ__HG_PTH_API_ERROR,       \
+                    char*,_fnname,                       \
+                    long,_err, char*,_errstr);           \
+   } while (0)
+
+
+/* Needed for older glibcs (2.3 and older, at least) who don't
+   otherwise "know" about pthread_rwlock_anything or about
+   PTHREAD_MUTEX_RECURSIVE (amongst things). */
+#define _GNU_SOURCE 1
+
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+
+
+/* A lame version of strerror which doesn't use the real libc
+   strerror_r, since using the latter just generates endless more
+   threading errors (glibc goes off and does tons of crap w.r.t.
+   locales etc) */
+static char* lame_strerror ( long err )
+{   switch (err) {
+      case EPERM:       return "EPERM: Operation not permitted";
+      case ENOENT:      return "ENOENT: No such file or directory";
+      case ESRCH:       return "ESRCH: No such process";
+      case EINTR:       return "EINTR: Interrupted system call";
+      case EBADF:       return "EBADF: Bad file number";
+      case EAGAIN:      return "EAGAIN: Try again";
+      case ENOMEM:      return "ENOMEM: Out of memory";
+      case EACCES:      return "EACCES: Permission denied";
+      case EFAULT:      return "EFAULT: Bad address";
+      case EEXIST:      return "EEXIST: File exists";
+      case EINVAL:      return "EINVAL: Invalid argument";
+      case EMFILE:      return "EMFILE: Too many open files";
+      case ENOSYS:      return "ENOSYS: Function not implemented";
+      case EOVERFLOW:   return "EOVERFLOW: Value too large "
+                               "for defined data type";
+      case EBUSY:       return "EBUSY: Device or resource busy";
+      case ETIMEDOUT:   return "ETIMEDOUT: Connection timed out";
+      case EDEADLK:     return "EDEADLK: Resource deadlock would occur";
+      case EOPNOTSUPP:  return "EOPNOTSUPP: Operation not supported on "
+                               "transport endpoint"; /* honest, guv */
+      default:          return "tc_intercepts.c: lame_strerror(): "
+                               "unhandled case -- please fix me!";
+   }
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- pthread_create, pthread_join, pthread_exit               ---*/
+/*----------------------------------------------------------------*/
+
+/* Do not rename this function.  It contains an unavoidable race and
+   so is mentioned by name in glibc-*helgrind*.supp. */
+static void* mythread_wrapper ( void* xargsV )
+{
+   volatile Word* xargs = (volatile Word*) xargsV;
+   void*(*fn)(void*) = (void*(*)(void*))xargs[0];
+   void* arg         = (void*)xargs[1];
+   pthread_t me = pthread_self();
+   /* Tell the tool what my pthread_t is. */
+   DO_CREQ_v_W(_VG_USERREQ__HG_SET_MY_PTHREAD_T, pthread_t,me);
+   /* allow the parent to proceed.  We can't let it proceed until
+      we're ready because (1) we need to make sure it doesn't exit and
+      hence deallocate xargs[] while we still need it, and (2) we
+      don't want either parent nor child to proceed until the tool has
+      been notified of the child's pthread_t. */
+   xargs[2] = 0;
+   /* Now we can no longer safely use xargs[]. */
+   return (void*) fn( (void*)arg );
+}
+
+// pthread_create
+PTH_FUNC(int, pthreadZucreateZAZa, // pthread_create@*
+              pthread_t *thread, const pthread_attr_t *attr,
+              void *(*start) (void *), void *arg)
+{
+   int    ret;
+   OrigFn fn;
+   volatile Word xargs[3];
+
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_create wrapper"); fflush(stderr);
+   }
+   xargs[0] = (Word)start;
+   xargs[1] = (Word)arg;
+   xargs[2] = 1; /* serves as a spinlock -- sigh */
+
+   CALL_FN_W_WWWW(ret, fn, thread,attr,mythread_wrapper,&xargs[0]);
+
+   if (ret == 0) {
+      /* we have to wait for the child to notify the tool of its
+         pthread_t before continuing */
+      while (xargs[2] != 0) {
+         /* Do nothing.  We need to spin until the child writes to
+            xargs[2].  However, that can lead to starvation in the
+            child and very long delays (eg, tc19_shadowmem on
+            ppc64-linux Fedora Core 6).  So yield the cpu if we can,
+            to let the child run at the earliest available
+            opportunity. */
+         sched_yield();
+      }
+   } else { 
+      DO_PthAPIerror( "pthread_create", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: pth_create -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+// pthread_join
+PTH_FUNC(int, pthreadZujoin, // pthread_join
+              pthread_t thread, void** value_pointer)
+{
+   int ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_join wrapper"); fflush(stderr);
+   }
+
+   CALL_FN_W_WW(ret, fn, thread,value_pointer);
+
+   /* At least with NPTL as the thread library, this is safe because
+      it is guaranteed (by NPTL) that the joiner will completely gone
+      before pthread_join (the original) returns.  See email below.*/
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_JOIN_POST, pthread_t,thread);
+   } else { 
+      DO_PthAPIerror( "pthread_join", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: pth_join -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+/* Behaviour of pthread_join on NPTL:
+
+Me:
+I have a question re the NPTL pthread_join implementation.
+
+  Suppose I am the thread 'stayer'.  
+
+  If I call pthread_join(quitter), is it guaranteed that the
+  thread 'quitter' has really exited before pthread_join returns?
+
+  IOW, is it guaranteed that 'quitter' will not execute any further
+  instructions after pthread_join returns?
+
+I believe this is true based on the following analysis of
+glibc-2.5 sources.  However am not 100% sure and would appreciate
+confirmation.
+
+  'quitter' will be running start_thread() in nptl/pthread_create.c
+
+  The last action of start_thread() is to exit via
+  __exit_thread_inline(0), which simply does sys_exit 
+  (nptl/pthread_create.c:403)
+
+  'stayer' meanwhile is waiting for lll_wait_tid (pd->tid) 
+  (call at nptl/pthread_join.c:89)
+
+  As per comment at nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h:536,
+  lll_wait_tid will not return until kernel notifies via futex
+  wakeup that 'quitter' has terminated.
+
+  Hence pthread_join cannot return until 'quitter' really has
+  completely disappeared.
+
+Drepper:
+>   As per comment at nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h:536,
+>   lll_wait_tid will not return until kernel notifies via futex
+>   wakeup that 'quitter' has terminated.
+That's the key.  The kernel resets the TID field after the thread is
+done.  No way the joiner can return before the thread is gone.
+*/
+
+
+/*----------------------------------------------------------------*/
+/*--- pthread_mutex_t functions                                ---*/
+/*----------------------------------------------------------------*/
+
+/* Handled:   pthread_mutex_init pthread_mutex_destroy
+              pthread_mutex_lock
+              pthread_mutex_trylock pthread_mutex_timedlock
+              pthread_mutex_unlock
+
+   Unhandled: pthread_spin_init pthread_spin_destroy 
+              pthread_spin_lock
+              pthread_spin_trylock
+              pthread_spin_unlock
+*/
+
+// pthread_mutex_init
+PTH_FUNC(int, pthreadZumutexZuinit, // pthread_mutex_init
+              pthread_mutex_t *mutex,
+              pthread_mutexattr_t* attr)
+{
+   int    ret;
+   long   mbRec;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_mxinit %p", mutex); fflush(stderr);
+   }
+
+   mbRec = 0;
+   if (attr) {
+      int ty, zzz;
+      zzz = pthread_mutexattr_gettype(attr, &ty);
+      if (zzz == 0 && ty == PTHREAD_MUTEX_RECURSIVE)
+         mbRec = 1;
+   }
+
+   CALL_FN_W_WW(ret, fn, mutex,attr);
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_INIT_POST,
+                   pthread_mutex_t*,mutex, long,mbRec);
+   } else { 
+      DO_PthAPIerror( "pthread_mutex_init", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: mxinit -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_mutex_destroy
+PTH_FUNC(int, pthreadZumutexZudestroy, // pthread_mutex_destroy
+              pthread_mutex_t *mutex)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_mxdestroy %p", mutex); fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_DESTROY_PRE,
+               pthread_mutex_t*,mutex);
+
+   CALL_FN_W_W(ret, fn, mutex);
+
+   if (ret != 0) {
+      DO_PthAPIerror( "pthread_mutex_destroy", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: mxdestroy -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_mutex_lock
+PTH_FUNC(int, pthreadZumutexZulock, // pthread_mutex_lock
+              pthread_mutex_t *mutex)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_mxlock %p", mutex); fflush(stderr);
+   }
+
+   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_PRE,
+                pthread_mutex_t*,mutex, long,0/*!isTryLock*/);
+
+   CALL_FN_W_W(ret, fn, mutex);
+
+   /* There's a hole here: libpthread now knows the lock is locked,
+      but the tool doesn't, so some other thread could run and detect
+      that the lock has been acquired by someone (this thread).  Does
+      this matter?  Not sure, but I don't think so. */
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,
+                  pthread_mutex_t*,mutex);
+   } else { 
+      DO_PthAPIerror( "pthread_mutex_lock", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: mxlock -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_mutex_trylock.  The handling needed here is very similar
+// to that for pthread_mutex_lock, except that we need to tell
+// the pre-lock creq that this is a trylock-style operation, and
+// therefore not to complain if the lock is nonrecursive and 
+// already locked by this thread -- because then it'll just fail
+// immediately with EBUSY.
+PTH_FUNC(int, pthreadZumutexZutrylock, // pthread_mutex_trylock
+              pthread_mutex_t *mutex)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_mxtrylock %p", mutex); fflush(stderr);
+   }
+
+   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_PRE,
+                pthread_mutex_t*,mutex, long,1/*isTryLock*/);
+
+   CALL_FN_W_W(ret, fn, mutex);
+
+   /* There's a hole here: libpthread now knows the lock is locked,
+      but the tool doesn't, so some other thread could run and detect
+      that the lock has been acquired by someone (this thread).  Does
+      this matter?  Not sure, but I don't think so. */
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,
+                  pthread_mutex_t*,mutex);
+   } else { 
+      if (ret != EBUSY)
+         DO_PthAPIerror( "pthread_mutex_trylock", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: mxtrylock -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_mutex_timedlock.  Identical logic to pthread_mutex_trylock.
+PTH_FUNC(int, pthreadZumutexZutimedlock, // pthread_mutex_timedlock
+	 pthread_mutex_t *mutex,
+         void* timeout)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_mxtimedlock %p %p", mutex, timeout); 
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_PRE,
+                pthread_mutex_t*,mutex, long,1/*isTryLock-ish*/);
+
+   CALL_FN_W_WW(ret, fn, mutex,timeout);
+
+   /* There's a hole here: libpthread now knows the lock is locked,
+      but the tool doesn't, so some other thread could run and detect
+      that the lock has been acquired by someone (this thread).  Does
+      this matter?  Not sure, but I don't think so. */
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,
+                  pthread_mutex_t*,mutex);
+   } else { 
+      if (ret != ETIMEDOUT)
+         DO_PthAPIerror( "pthread_mutex_timedlock", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: mxtimedlock -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_mutex_unlock
+PTH_FUNC(int, pthreadZumutexZuunlock, // pthread_mutex_unlock
+              pthread_mutex_t *mutex)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_mxunlk %p", mutex); fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE,
+               pthread_mutex_t*,mutex);
+
+   CALL_FN_W_W(ret, fn, mutex);
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_POST,
+                  pthread_mutex_t*,mutex);
+   } else { 
+      DO_PthAPIerror( "pthread_mutex_unlock", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " mxunlk -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- pthread_cond_t functions                                 ---*/
+/*----------------------------------------------------------------*/
+
+/* Handled:   pthread_cond_wait pthread_cond_timedwait
+              pthread_cond_signal pthread_cond_broadcast
+              pthread_cond_destroy
+
+   Unhandled: pthread_cond_init
+              -- is this important?
+*/
+
+// pthread_cond_wait
+PTH_FUNC(int, pthreadZucondZuwaitZAZa, // pthread_cond_wait@*
+              pthread_cond_t* cond, pthread_mutex_t* mutex)
+{
+   int ret;
+   OrigFn fn;
+   unsigned long mutex_is_valid;
+
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_cond_wait %p %p", cond, mutex);
+      fflush(stderr);
+   }
+
+   /* Tell the tool a cond-wait is about to happen, so it can check
+      for bogus argument values.  In return it tells us whether it
+      thinks the mutex is valid or not. */
+   DO_CREQ_W_WW(mutex_is_valid,
+                _VG_USERREQ__HG_PTHREAD_COND_WAIT_PRE,
+                pthread_cond_t*,cond, pthread_mutex_t*,mutex);
+   assert(mutex_is_valid == 1 || mutex_is_valid == 0);
+
+   /* Tell the tool we're about to drop the mutex.  This reflects the
+      fact that in a cond_wait, we show up holding the mutex, and the
+      call atomically drops the mutex and waits for the cv to be
+      signalled. */
+   if (mutex_is_valid) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE,
+                  pthread_mutex_t*,mutex);
+   }
+
+   CALL_FN_W_WW(ret, fn, cond,mutex);
+
+   /* these conditionals look stupid, but compare w/ same logic for
+      pthread_cond_timedwait below */
+   if (ret == 0 && mutex_is_valid) {
+      /* and now we have the mutex again */
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,
+                  pthread_mutex_t*,mutex);
+   }
+
+   if (ret == 0 && mutex_is_valid) {
+      DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_COND_WAIT_POST,
+                   pthread_cond_t*,cond, pthread_mutex_t*,mutex);
+   }
+
+   if (ret != 0) {
+      DO_PthAPIerror( "pthread_cond_wait", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " cowait -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+
+// pthread_cond_timedwait
+PTH_FUNC(int, pthreadZucondZutimedwaitZAZa, // pthread_cond_timedwait@*
+         pthread_cond_t* cond, pthread_mutex_t* mutex, 
+         struct timespec* abstime)
+{
+   int ret;
+   OrigFn fn;
+   unsigned long mutex_is_valid;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_cond_timedwait %p %p %p", 
+                      cond, mutex, abstime);
+      fflush(stderr);
+   }
+
+   /* Tell the tool a cond-wait is about to happen, so it can check
+      for bogus argument values.  In return it tells us whether it
+      thinks the mutex is valid or not. */
+   DO_CREQ_W_WW(mutex_is_valid,
+                _VG_USERREQ__HG_PTHREAD_COND_WAIT_PRE,
+                pthread_cond_t*,cond, pthread_mutex_t*,mutex);
+   assert(mutex_is_valid == 1 || mutex_is_valid == 0);
+
+   /* Tell the tool we're about to drop the mutex.  This reflects the
+      fact that in a cond_wait, we show up holding the mutex, and the
+      call atomically drops the mutex and waits for the cv to be
+      signalled. */
+   if (mutex_is_valid) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE,
+                  pthread_mutex_t*,mutex);
+   }
+
+   CALL_FN_W_WWW(ret, fn, cond,mutex,abstime);
+
+   if ((ret == 0 || ret == ETIMEDOUT) && mutex_is_valid) {
+      /* and now we have the mutex again */
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,
+                  pthread_mutex_t*,mutex);
+   }
+
+   if (ret == 0 && mutex_is_valid) {
+      DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_COND_WAIT_POST,
+                   pthread_cond_t*,cond, pthread_mutex_t*,mutex);
+   }
+
+   if (ret != 0 && ret != ETIMEDOUT) {
+      DO_PthAPIerror( "pthread_cond_timedwait", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " cotimedwait -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+
+// pthread_cond_signal
+PTH_FUNC(int, pthreadZucondZusignalZAZa, // pthread_cond_signal@*
+              pthread_cond_t* cond)
+{
+   int ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_cond_signal %p", cond);
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_COND_SIGNAL_PRE,
+               pthread_cond_t*,cond);
+
+   CALL_FN_W_W(ret, fn, cond);
+
+   if (ret != 0) {
+      DO_PthAPIerror( "pthread_cond_signal", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " cosig -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+
+// pthread_cond_broadcast
+// Note, this is pretty much identical, from a dependency-graph
+// point of view, with cond_signal, so the code is duplicated.
+// Maybe it should be commoned up.
+PTH_FUNC(int, pthreadZucondZubroadcastZAZa, // pthread_cond_broadcast@*
+              pthread_cond_t* cond)
+{
+   int ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_broadcast_signal %p", cond);
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_COND_BROADCAST_PRE,
+               pthread_cond_t*,cond);
+
+   CALL_FN_W_W(ret, fn, cond);
+
+   if (ret != 0) { 
+      DO_PthAPIerror( "pthread_cond_broadcast", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " cobro -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+
+// pthread_cond_destroy
+PTH_FUNC(int, pthreadZucondZudestroyZAZa, // pthread_cond_destroy@*
+              pthread_cond_t* cond)
+{
+   int ret;
+   OrigFn fn;
+
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_cond_destroy %p", cond);
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_COND_DESTROY_PRE,
+               pthread_cond_t*,cond);
+
+   CALL_FN_W_W(ret, fn, cond);
+
+   if (ret != 0) {
+      DO_PthAPIerror( "pthread_cond_destroy", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " codestr -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- pthread_barrier_t functions                              ---*/
+/*----------------------------------------------------------------*/
+
+/* Handled:   pthread_barrier_init
+              pthread_barrier_wait
+              pthread_barrier_destroy
+
+   Unhandled: pthread_barrierattr_destroy
+              pthread_barrierattr_getpshared
+              pthread_barrierattr_init
+              pthread_barrierattr_setpshared
+              -- are these important?
+*/
+
+PTH_FUNC(int, pthreadZubarrierZuinit, // pthread_barrier_init
+         pthread_barrier_t* bar,
+         pthread_barrierattr_t* attr, unsigned long count)
+{
+   int ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_barrier_init %p %p %lu",
+                      bar, attr, count);
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_BARRIER_INIT_PRE,
+                pthread_barrier_t*,bar,
+                unsigned long,count);
+
+   CALL_FN_W_WWW(ret, fn, bar,attr,count);
+
+   if (ret != 0) {
+      DO_PthAPIerror( "pthread_barrier_init", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "  pthread_barrier_init -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+
+PTH_FUNC(int, pthreadZubarrierZuwait, // pthread_barrier_wait
+              pthread_barrier_t* bar)
+{
+   int ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_barrier_wait %p", bar);
+      fflush(stderr);
+   }
+
+   /* That this works correctly, and doesn't screw up when a thread
+      leaving the barrier races round to the front and re-enters while
+      other threads are still leaving it, is quite subtle.  See
+      comments in the handler for PTHREAD_BARRIER_WAIT_PRE in
+      hg_main.c. */
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_BARRIER_WAIT_PRE,
+               pthread_barrier_t*,bar);
+
+   CALL_FN_W_W(ret, fn, bar);
+
+   if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD) {
+      DO_PthAPIerror( "pthread_barrier_wait", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "  pthread_barrier_wait -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+
+PTH_FUNC(int, pthreadZubarrierZudestroy, // pthread_barrier_destroy
+         pthread_barrier_t* bar)
+{
+   int ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_barrier_destroy %p", bar);
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_BARRIER_DESTROY_PRE,
+               pthread_barrier_t*,bar);
+
+   CALL_FN_W_W(ret, fn, bar);
+
+   if (ret != 0) {
+      DO_PthAPIerror( "pthread_barrier_destroy", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "  pthread_barrier_destroy -> %d >>\n", ret);
+   }
+
+   return ret;
+}
+
+/*----------------------------------------------------------------*/
+/*--- pthread_rwlock_t functions                               ---*/
+/*----------------------------------------------------------------*/
+
+/* Handled:   pthread_rwlock_init pthread_rwlock_destroy
+              pthread_rwlock_rdlock 
+              pthread_rwlock_wrlock
+              pthread_rwlock_unlock
+
+   Unhandled: pthread_rwlock_timedrdlock
+              pthread_rwlock_tryrdlock
+
+              pthread_rwlock_timedwrlock
+              pthread_rwlock_trywrlock
+*/
+
+// pthread_rwlock_init
+PTH_FUNC(int, pthreadZurwlockZuinit, // pthread_rwlock_init
+              pthread_rwlock_t *rwl,
+              pthread_rwlockattr_t* attr)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_rwl_init %p", rwl); fflush(stderr);
+   }
+
+   CALL_FN_W_WW(ret, fn, rwl,attr);
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_RWLOCK_INIT_POST,
+                  pthread_rwlock_t*,rwl);
+   } else { 
+      DO_PthAPIerror( "pthread_rwlock_init", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: rwl_init -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_rwlock_destroy
+PTH_FUNC(int, pthreadZurwlockZudestroy, // pthread_rwlock_destroy
+              pthread_rwlock_t *rwl)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_rwl_destroy %p", rwl); fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_RWLOCK_DESTROY_PRE,
+               pthread_rwlock_t*,rwl);
+
+   CALL_FN_W_W(ret, fn, rwl);
+
+   if (ret != 0) {
+      DO_PthAPIerror( "pthread_rwlock_destroy", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: rwl_destroy -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_rwlock_wrlock
+PTH_FUNC(int, pthreadZurwlockZuwrlock, // pthread_rwlock_wrlock
+	 pthread_rwlock_t* rwlock)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_rwl_wlk %p", rwlock); fflush(stderr);
+   }
+
+   DO_CREQ_v_WWW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE,
+                 pthread_rwlock_t*,rwlock, 
+                 long,1/*isW*/, long,0/*!isTryLock*/);
+
+   CALL_FN_W_W(ret, fn, rwlock);
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_POST,
+                   pthread_rwlock_t*,rwlock, long,1/*isW*/);
+   } else { 
+      DO_PthAPIerror( "pthread_rwlock_wrlock", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: rwl_wlk -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_rwlock_rdlock
+PTH_FUNC(int, pthreadZurwlockZurdlock, // pthread_rwlock_rdlock
+	 pthread_rwlock_t* rwlock)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_rwl_rlk %p", rwlock); fflush(stderr);
+   }
+
+   DO_CREQ_v_WWW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE,
+                 pthread_rwlock_t*,rwlock,
+                 long,0/*!isW*/, long,0/*!isTryLock*/);
+
+   CALL_FN_W_W(ret, fn, rwlock);
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_POST,
+                   pthread_rwlock_t*,rwlock, long,0/*!isW*/);
+   } else { 
+      DO_PthAPIerror( "pthread_rwlock_rdlock", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: rwl_rlk -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_rwlock_trywrlock
+PTH_FUNC(int, pthreadZurwlockZutrywrlock, // pthread_rwlock_trywrlock
+	 pthread_rwlock_t* rwlock)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_rwl_trywlk %p", rwlock); fflush(stderr);
+   }
+
+   DO_CREQ_v_WWW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE,
+                 pthread_rwlock_t*,rwlock, 
+                 long,1/*isW*/, long,1/*isTryLock*/);
+
+   CALL_FN_W_W(ret, fn, rwlock);
+
+   /* There's a hole here: libpthread now knows the lock is locked,
+      but the tool doesn't, so some other thread could run and detect
+      that the lock has been acquired by someone (this thread).  Does
+      this matter?  Not sure, but I don't think so. */
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_POST,
+                   pthread_rwlock_t*,rwlock, long,1/*isW*/);
+   } else { 
+      if (ret != EBUSY)
+         DO_PthAPIerror( "pthread_rwlock_trywrlock", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: rwl_trywlk -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_rwlock_tryrdlock
+PTH_FUNC(int, pthreadZurwlockZutryrdlock, // pthread_rwlock_tryrdlock
+	 pthread_rwlock_t* rwlock)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_rwl_tryrlk %p", rwlock); fflush(stderr);
+   }
+
+   DO_CREQ_v_WWW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE,
+                 pthread_rwlock_t*,rwlock, 
+                 long,0/*!isW*/, long,1/*isTryLock*/);
+
+   CALL_FN_W_W(ret, fn, rwlock);
+
+   /* There's a hole here: libpthread now knows the lock is locked,
+      but the tool doesn't, so some other thread could run and detect
+      that the lock has been acquired by someone (this thread).  Does
+      this matter?  Not sure, but I don't think so. */
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_POST,
+                   pthread_rwlock_t*,rwlock, long,0/*!isW*/);
+   } else { 
+      if (ret != EBUSY)
+         DO_PthAPIerror( "pthread_rwlock_tryrdlock", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: rwl_tryrlk -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+// pthread_rwlock_unlock
+PTH_FUNC(int, pthreadZurwlockZuunlock, // pthread_rwlock_unlock
+	 pthread_rwlock_t* rwlock)
+{
+   int    ret;
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, "<< pthread_rwl_unlk %p", rwlock); fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_PRE,
+               pthread_rwlock_t*,rwlock);
+
+   CALL_FN_W_W(ret, fn, rwlock);
+
+   if (ret == 0 /*success*/) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_POST,
+                  pthread_rwlock_t*,rwlock);
+   } else { 
+      DO_PthAPIerror( "pthread_rwlock_unlock", ret );
+   }
+
+   if (TRACE_PTH_FNS) {
+      fprintf(stderr, " :: rwl_unlk -> %d >>\n", ret);
+   }
+   return ret;
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- POSIX semaphores                                         ---*/
+/*----------------------------------------------------------------*/
+
+#include <semaphore.h>
+
+#define TRACE_SEM_FNS 0
+
+/* Handled: 
+     int sem_init(sem_t *sem, int pshared, unsigned value);
+     int sem_destroy(sem_t *sem);
+     int sem_wait(sem_t *sem);
+     int sem_post(sem_t *sem);
+
+   Unhandled:
+     int sem_trywait(sem_t *sem);
+     int sem_timedwait(sem_t *restrict sem,
+                       const struct timespec *restrict abs_timeout);
+*/
+
+/* glibc-2.5 has sem_init@@GLIBC_2.2.5 (amd64-linux)
+             and sem_init@@GLIBC_2.1 (x86-linux): match sem_init@* */
+PTH_FUNC(int, semZuinitZAZa, sem_t* sem, int pshared, unsigned long value)
+{
+   OrigFn fn;
+   int    ret;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_SEM_FNS) {
+      fprintf(stderr, "<< sem_init(%p,%d,%lu) ", sem,pshared,value);
+      fflush(stderr);
+   }
+
+   CALL_FN_W_WWW(ret, fn, sem,pshared,value);
+
+   if (ret == 0) {
+      DO_CREQ_v_WW(_VG_USERREQ__HG_POSIX_SEM_INIT_POST,
+                   sem_t*, sem, unsigned long, value);
+   } else {
+      DO_PthAPIerror( "sem_init", errno );
+   }
+
+   if (TRACE_SEM_FNS) {
+      fprintf(stderr, " sem_init -> %d >>\n", ret);
+      fflush(stderr);
+   }
+
+   return ret;
+}
+
+
+/* glibc-2.5 has sem_destroy@@GLIBC_2.2.5 (amd64-linux)
+             and sem_destroy@@GLIBC_2.1 (x86-linux); match sem_destroy@* */
+PTH_FUNC(int, semZudestroyZAZa, sem_t* sem)
+{
+   OrigFn fn;
+   int    ret;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_SEM_FNS) {
+      fprintf(stderr, "<< sem_destroy(%p) ", sem);
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_POSIX_SEM_DESTROY_PRE, sem_t*, sem);
+
+   CALL_FN_W_W(ret, fn, sem);
+
+   if (ret != 0) {
+      DO_PthAPIerror( "sem_destroy", errno );
+   }
+
+   if (TRACE_SEM_FNS) {
+      fprintf(stderr, " sem_destroy -> %d >>\n", ret);
+      fflush(stderr);
+   }
+
+   return ret;
+}
+
+
+/* glibc-2.5 has sem_wait (amd64-linux); match sem_wait
+             and sem_wait@@GLIBC_2.1 (x86-linux); match sem_wait@* */
+/* wait: decrement semaphore - acquire lockage */
+static int sem_wait_WRK(sem_t* sem)
+{
+   OrigFn fn;
+   int    ret;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_SEM_FNS) {
+      fprintf(stderr, "<< sem_wait(%p) ", sem);
+      fflush(stderr);
+   }
+
+   CALL_FN_W_W(ret, fn, sem);
+
+   if (ret == 0) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_POSIX_SEM_WAIT_POST, sem_t*,sem);
+   } else {
+      DO_PthAPIerror( "sem_wait", errno );
+   }
+
+   if (TRACE_SEM_FNS) {
+      fprintf(stderr, " sem_wait -> %d >>\n", ret);
+      fflush(stderr);
+   }
+
+   return ret;
+}
+PTH_FUNC(int, semZuwait, sem_t* sem) { /* sem_wait */
+   return sem_wait_WRK(sem);
+}
+PTH_FUNC(int, semZuwaitZAZa, sem_t* sem) { /* sem_wait@* */
+   return sem_wait_WRK(sem);
+}
+
+
+/* glibc-2.5 has sem_post (amd64-linux); match sem_post
+             and sem_post@@GLIBC_2.1 (x86-linux); match sem_post@* */
+/* post: increment semaphore - release lockage */
+static int sem_post_WRK(sem_t* sem)
+{
+   OrigFn fn;
+   int    ret;
+
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_SEM_FNS) {
+      fprintf(stderr, "<< sem_post(%p) ", sem);
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_POSIX_SEM_POST_PRE, sem_t*,sem);
+
+   CALL_FN_W_W(ret, fn, sem);
+
+   if (ret != 0) {
+      DO_PthAPIerror( "sem_post", errno );
+   }
+
+   if (TRACE_SEM_FNS) {
+      fprintf(stderr, " sem_post -> %d >>\n", ret);
+      fflush(stderr);
+   }
+
+   return ret;
+}
+PTH_FUNC(int, semZupost, sem_t* sem) { /* sem_post */
+   return sem_post_WRK(sem);
+}
+PTH_FUNC(int, semZupostZAZa, sem_t* sem) { /* sem_post@* */
+   return sem_post_WRK(sem);
+}
+
+
+
+/*----------------------------------------------------------------*/
+/*--- Qt 4 threading functions (w/ GNU name mangling)          ---*/
+/*----------------------------------------------------------------*/
+
+/* Handled:
+      QMutex::lock()
+      QMutex::unlock()
+      QMutex::tryLock()
+      QMutex::tryLock(int)
+
+      QMutex::QMutex(QMutex::RecursionMode)  _ZN6QMutexC1ENS_13RecursionModeE
+      QMutex::QMutex(QMutex::RecursionMode)  _ZN6QMutexC2ENS_13RecursionModeE
+      QMutex::~QMutex()                      _ZN6QMutexD1Ev
+      QMutex::~QMutex()                      _ZN6QMutexD2Ev
+
+   Unhandled:
+      QReadWriteLock::lockForRead()
+      QReadWriteLock::lockForWrite()
+      QReadWriteLock::unlock()
+      QReadWriteLock::tryLockForRead(int)
+      QReadWriteLock::tryLockForRead()
+      QReadWriteLock::tryLockForWrite(int)
+      QReadWriteLock::tryLockForWrite()
+
+      QWaitCondition::wait(QMutex*, unsigned long)
+      QWaitCondition::wakeAll()
+      QWaitCondition::wakeOne()
+
+      QSemaphore::*
+*/
+/* More comments, 19 Nov 08, based on assessment of qt-4.5.0TP1,
+   at least on Unix:
+
+   It's apparently only necessary to intercept QMutex, since that is
+   not implemented using pthread_mutex_t; instead Qt4 has its own
+   implementation based on atomics (to check the non-contended case)
+   and pthread_cond_wait (to wait in the contended case).
+
+   QReadWriteLock is built on top of QMutex, counters, and a wait
+   queue.  So we don't need to handle it specially once QMutex
+   handling is correct -- presumably the dependencies through QMutex
+   are sufficient to avoid any false race reports.  On the other hand,
+   it is an open question whether too many dependencies are observed
+   -- in which case we may miss races (false negatives).  I suspect
+   this is likely to be the case, unfortunately.
+
+   QWaitCondition is built on pthread_cond_t, pthread_mutex_t, QMutex
+   and QReadWriteLock.  Same compositional-correctness justificiation
+   and limitations as fro QReadWriteLock.
+
+   Ditto QSemaphore (from cursory examination).
+
+   Does it matter that only QMutex is handled directly?  Open
+   question.  From testing with drd/tests/qt4_* and with KDE4 apps, it
+   appears that no false errors are reported; however it is not clear
+   if this is causing false negatives.
+
+   Another problem with Qt4 is thread exiting.  Threads are created
+   with pthread_create (fine); but they detach and simply exit when
+   done.  There is no use of pthread_join, and the provided
+   wait-for-a-thread-to-exit mechanism (QThread::wait, I believe)
+   relies on a system of mutexes and flags.  I suspect this also
+   causes too many dependencies to appear.  Consequently H sometimes
+   fails to detect races at exit in some very short-lived racy
+   programs, because it appears that a thread can exit _and_ have an
+   observed dependency edge back to the main thread (presumably)
+   before the main thread reaps the child (that is, calls
+   QThread::wait).
+
+   This theory is supported by the observation that if all threads are
+   made to wait at a pthread_barrier_t immediately before they exit,
+   then H's detection of races in such programs becomes reliable;
+   without the barrier, it is varies from run to run, depending
+   (according to investigation) on whether aforementioned
+   exit-before-reaping behaviour happens or not.
+
+   Finally, why is it necessary to intercept the QMutex constructors
+   and destructors?  The constructors are intercepted only as a matter
+   of convenience, so H can print accurate "first observed at"
+   clauses.  However, it is actually necessary to intercept the
+   destructors (as it is with pthread_mutex_destroy) in order that
+   locks get removed from LAOG when they are destroyed.
+*/
+
+// soname is libQtCore.so.4 ; match against libQtCore.so*
+#define QT4_FUNC(ret_ty, f, args...) \
+   ret_ty I_WRAP_SONAME_FNNAME_ZU(libQtCoreZdsoZa,f)(args); \
+   ret_ty I_WRAP_SONAME_FNNAME_ZU(libQtCoreZdsoZa,f)(args)
+
+// QMutex::lock()
+QT4_FUNC(void, _ZN6QMutex4lockEv, void* self)
+{
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_QT4_FNS) {
+      fprintf(stderr, "<< QMutex::lock %p", self); fflush(stderr);
+   }
+
+   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_PRE,
+                void*,self, long,0/*!isTryLock*/);
+
+   CALL_FN_v_W(fn, self);
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,
+               void*, self);
+
+   if (TRACE_QT4_FNS) {
+      fprintf(stderr, " :: Q::lock done >>\n");
+   }
+}
+
+// QMutex::unlock()
+QT4_FUNC(void, _ZN6QMutex6unlockEv, void* self)
+{
+   OrigFn fn;
+   VALGRIND_GET_ORIG_FN(fn);
+
+   if (TRACE_QT4_FNS) {
+      fprintf(stderr, "<< QMutex::unlock %p", self); fflush(stderr);
+   }
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE,
+               void*, self);
+
+   CALL_FN_v_W(fn, self);
+
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_POST,
+               void*, self);
+
+   if (TRACE_QT4_FNS) {
+      fprintf(stderr, " Q::unlock done >>\n");
+   }
+}
+
+// bool QMutex::tryLock()
+// using 'long' to mimic C++ 'bool'
+QT4_FUNC(long, _ZN6QMutex7tryLockEv, void* self)
+{
+   OrigFn fn;
+   long   ret;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_QT4_FNS) {
+      fprintf(stderr, "<< QMutex::tryLock %p", self); fflush(stderr);
+   }
+
+   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_PRE,
+                void*,self, long,1/*isTryLock*/);
+
+   CALL_FN_W_W(ret, fn, self);
+
+   // assumes that only the low 8 bits of the 'bool' are significant
+   if (ret & 0xFF) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,
+                  void*, self);
+   }
+
+   if (TRACE_QT4_FNS) {
+      fprintf(stderr, " :: Q::tryLock -> %lu >>\n", ret);
+   }
+   
+   return ret;
+}
+
+// bool QMutex::tryLock(int)
+// using 'long' to mimic C++ 'bool'
+QT4_FUNC(long, _ZN6QMutex7tryLockEi, void* self, long arg2)
+{
+   OrigFn fn;
+   long   ret;
+   VALGRIND_GET_ORIG_FN(fn);
+   if (TRACE_QT4_FNS) {
+      fprintf(stderr, "<< QMutex::tryLock(int) %p %d", self, (int)arg2);
+      fflush(stderr);
+   }
+
+   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_PRE,
+                void*,self, long,1/*isTryLock*/);
+
+   CALL_FN_W_WW(ret, fn, self,arg2);
+
+   // assumes that only the low 8 bits of the 'bool' are significant
+   if (ret & 0xFF) {
+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,
+                  void*, self);
+   }
+
+   if (TRACE_QT4_FNS) {
+      fprintf(stderr, " :: Q::tryLock(int) -> %lu >>\n", ret);
+   }
+   
+   return ret;
+}
+
+
+// It's not really very clear what the args are here.  But from
+// a bit of dataflow analysis of the generated machine code of
+// the original function, it appears this takes two args, and
+// returns nothing.  Nevertheless preserve return value just in
+// case.  A bit of debug printing indicates that the first arg
+// is that of the mutex and the second is either zero or one,
+// probably being the recursion mode, therefore.
+// QMutex::QMutex(QMutex::RecursionMode)  ("C1ENS" variant)
+QT4_FUNC(void*, _ZN6QMutexC1ENS_13RecursionModeE,
+         void* mutex,
+         long  recmode)
+{
+   OrigFn fn;
+   long   ret;
+   VALGRIND_GET_ORIG_FN(fn);
+   CALL_FN_W_WW(ret, fn, mutex, recmode);
+   //   fprintf(stderr, "QMutex constructor 1: %p <- %p %p\n", ret, arg1, arg2);
+   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_INIT_POST,
+                void*,mutex, long,1/*mbRec*/);
+   return (void*)ret;
+}
+
+// QMutex::~QMutex()  ("D1Ev" variant)
+QT4_FUNC(void*, _ZN6QMutexD1Ev, void* mutex)
+{
+   OrigFn fn;
+   long   ret;
+   VALGRIND_GET_ORIG_FN(fn);
+   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_DESTROY_PRE,
+               void*,mutex);
+   CALL_FN_W_W(ret, fn, mutex);
+   return (void*)ret;
+}
+
+
+// QMutex::QMutex(QMutex::RecursionMode)  ("C2ENS" variant)
+QT4_FUNC(void*, _ZN6QMutexC2ENS_13RecursionModeE,
+         void* mutex,
+         long  recmode)
+{
+   assert(0);
+}
+
+// QMutex::~QMutex()  ("D2Ev" variant)
+QT4_FUNC(void*, _ZN6QMutexD2Ev, void* mutex)
+{
+   assert(0);
+}
+
+
+// QReadWriteLock is not intercepted directly.  See comments
+// above.
+
+//// QReadWriteLock::lockForRead()
+//// _ZN14QReadWriteLock11lockForReadEv == QReadWriteLock::lockForRead()
+//QT4_FUNC(void, ZuZZN14QReadWriteLock11lockForReadEv, 
+//               // _ZN14QReadWriteLock11lockForReadEv
+//               void* self)
+//{
+//   OrigFn fn;
+//   VALGRIND_GET_ORIG_FN(fn);
+//   if (TRACE_QT4_FNS) {
+//      fprintf(stderr, "<< QReadWriteLock::lockForRead %p", self);
+//      fflush(stderr);
+//   }
+//
+//   DO_CREQ_v_WWW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE,
+//                 void*,self,
+//                 long,0/*!isW*/, long,0/*!isTryLock*/);
+//
+//   CALL_FN_v_W(fn, self);
+//
+//   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_POST,
+//                void*,self, long,0/*!isW*/);
+//
+//   if (TRACE_QT4_FNS) {
+//      fprintf(stderr, " :: Q::lockForRead :: done >>\n");
+//   }
+//}
+//
+//// QReadWriteLock::lockForWrite()
+//// _ZN14QReadWriteLock12lockForWriteEv == QReadWriteLock::lockForWrite()
+//QT4_FUNC(void, ZuZZN14QReadWriteLock12lockForWriteEv, 
+//               // _ZN14QReadWriteLock12lockForWriteEv
+//               void* self)
+//{
+//   OrigFn fn;
+//   VALGRIND_GET_ORIG_FN(fn);
+//   if (TRACE_QT4_FNS) {
+//      fprintf(stderr, "<< QReadWriteLock::lockForWrite %p", self);
+//      fflush(stderr);
+//   }
+//
+//   DO_CREQ_v_WWW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE,
+//                 void*,self,
+//                 long,1/*isW*/, long,0/*!isTryLock*/);
+//
+//   CALL_FN_v_W(fn, self);
+//
+//   DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_POST,
+//                void*,self, long,1/*isW*/);
+//
+//   if (TRACE_QT4_FNS) {
+//      fprintf(stderr, " :: Q::lockForWrite :: done >>\n");
+//   }
+//}
+//
+//// QReadWriteLock::unlock()
+//// _ZN14QReadWriteLock6unlockEv == QReadWriteLock::unlock()
+//QT4_FUNC(void, ZuZZN14QReadWriteLock6unlockEv,
+//               // _ZN14QReadWriteLock6unlockEv
+//               void* self)
+//{
+//   OrigFn fn;
+//   VALGRIND_GET_ORIG_FN(fn);
+//   if (TRACE_QT4_FNS) {
+//      fprintf(stderr, "<< QReadWriteLock::unlock %p", self);
+//      fflush(stderr);
+//   }
+//
+//   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_PRE,
+//               void*,self);
+//
+//   CALL_FN_v_W(fn, self);
+//
+//   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_POST,
+//               void*,self);
+//
+//   if (TRACE_QT4_FNS) {
+//      fprintf(stderr, " :: Q::unlock :: done >>\n");
+//   }
+//}
+
+
+/*----------------------------------------------------------------*/
+/*--- Replacements for basic string functions, that don't      ---*/
+/*--- overrun the input arrays.                                ---*/
+/*----------------------------------------------------------------*/
+
+/* Copied verbatim from memcheck/mc_replace_strmem.c.  When copying
+   new functions, please keep them in the same order as they appear in
+   mc_replace_strmem.c. */
+
+
+#define STRCHR(soname, fnname) \
+   char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ); \
+   char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ) \
+   { \
+      UChar  ch = (UChar)((UInt)c); \
+      UChar* p  = (UChar*)s; \
+      while (True) { \
+         if (*p == ch) return p; \
+         if (*p == 0) return NULL; \
+         p++; \
+      } \
+   }
+
+// Apparently index() is the same thing as strchr()
+STRCHR(VG_Z_LIBC_SONAME,          strchr)
+STRCHR(VG_Z_LD_LINUX_SO_2,        strchr)
+STRCHR(VG_Z_LD_LINUX_X86_64_SO_2, strchr)
+STRCHR(VG_Z_LIBC_SONAME,          index)
+STRCHR(VG_Z_LD_LINUX_SO_2,        index)
+STRCHR(VG_Z_LD_LINUX_X86_64_SO_2, index)
+
+
+// Note that this replacement often doesn't get used because gcc inlines
+// calls to strlen() with its own built-in version.  This can be very
+// confusing if you aren't expecting it.  Other small functions in this file
+// may also be inline by gcc.
+#define STRLEN(soname, fnname) \
+   SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ); \
+   SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ) \
+   { \
+      SizeT i = 0; \
+      while (str[i] != 0) i++; \
+      return i; \
+   }
+
+STRLEN(VG_Z_LIBC_SONAME,          strlen)
+STRLEN(VG_Z_LD_LINUX_SO_2,        strlen)
+STRLEN(VG_Z_LD_LINUX_X86_64_SO_2, strlen)
+
+
+#define STRCPY(soname, fnname) \
+   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ); \
+   char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ) \
+   { \
+      const Char* dst_orig = dst; \
+      \
+      while (*src) *dst++ = *src++; \
+      *dst = 0; \
+      \
+      return (char*)dst_orig; \
+   }
+
+STRCPY(VG_Z_LIBC_SONAME, strcpy)
+
+
+#define STRCMP(soname, fnname) \
+   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
+          ( const char* s1, const char* s2 ); \
+   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
+          ( const char* s1, const char* s2 ) \
+   { \
+      register unsigned char c1; \
+      register unsigned char c2; \
+      while (True) { \
+         c1 = *(unsigned char *)s1; \
+         c2 = *(unsigned char *)s2; \
+         if (c1 != c2) break; \
+         if (c1 == 0) break; \
+         s1++; s2++; \
+      } \
+      if ((unsigned char)c1 < (unsigned char)c2) return -1; \
+      if ((unsigned char)c1 > (unsigned char)c2) return 1; \
+      return 0; \
+   }
+
+STRCMP(VG_Z_LIBC_SONAME,          strcmp)
+STRCMP(VG_Z_LD_LINUX_X86_64_SO_2, strcmp)
+STRCMP(VG_Z_LD64_SO_1,            strcmp)
+
+
+#define MEMCPY(soname, fnname) \
+   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
+            ( void *dst, const void *src, SizeT len ); \
+   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
+            ( void *dst, const void *src, SizeT len ) \
+   { \
+      register char *d; \
+      register char *s; \
+      \
+      if (len == 0) \
+         return dst; \
+      \
+      if ( dst > src ) { \
+         d = (char *)dst + len - 1; \
+         s = (char *)src + len - 1; \
+         while ( len >= 4 ) { \
+            *d-- = *s--; \
+            *d-- = *s--; \
+            *d-- = *s--; \
+            *d-- = *s--; \
+            len -= 4; \
+         } \
+         while ( len-- ) { \
+            *d-- = *s--; \
+         } \
+      } else if ( dst < src ) { \
+         d = (char *)dst; \
+         s = (char *)src; \
+         while ( len >= 4 ) { \
+            *d++ = *s++; \
+            *d++ = *s++; \
+            *d++ = *s++; \
+            *d++ = *s++; \
+            len -= 4; \
+         } \
+         while ( len-- ) { \
+            *d++ = *s++; \
+         } \
+      } \
+      return dst; \
+   }
+
+MEMCPY(VG_Z_LIBC_SONAME,    memcpy)
+MEMCPY(VG_Z_LD_SO_1,        memcpy) /* ld.so.1 */
+MEMCPY(VG_Z_LD64_SO_1,      memcpy) /* ld64.so.1 */
+/* icc9 blats these around all over the place.  Not only in the main
+   executable but various .so's.  They are highly tuned and read
+   memory beyond the source boundary (although work correctly and
+   never go across page boundaries), so give errors when run natively,
+   at least for misaligned source arg.  Just intercepting in the exe
+   only until we understand more about the problem.  See
+   http://bugs.kde.org/show_bug.cgi?id=139776
+ */
+MEMCPY(NONE, _intel_fast_memcpy)
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          tc_intercepts.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/hg_lock_n_thread.c.svn-base b/helgrind/.svn/text-base/hg_lock_n_thread.c.svn-base
new file mode 100644
index 0000000..bcdb236
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_lock_n_thread.c.svn-base
@@ -0,0 +1,123 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Definitions for Locks and Threads.                           ---*/
+/*---                                           hg_lock_n_thread.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_threadstate.h"
+#include "pub_tool_wordfm.h"
+
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"            /* self */
+
+
+/*----------------------------------------------------------------*/
+/*--- Sanity checking                                          ---*/
+/*----------------------------------------------------------------*/
+
+inline Bool HG_(is_sane_Thread) ( Thread* thr ) {
+   return thr != NULL && thr->magic == Thread_MAGIC;
+}
+
+static Bool is_sane_Bag_of_Threads ( WordBag* bag )
+{
+   Thread* thr;
+   Word    count;
+   VG_(initIterBag)( bag );
+   while (VG_(nextIterBag)( bag, (Word*)&thr, &count )) {
+      if (count < 1) return False;
+      if (!HG_(is_sane_Thread)(thr)) return False;
+   }
+   VG_(doneIterBag)( bag );
+   return True;
+}
+
+static Bool is_sane_Lock_BASE ( Lock* lock )
+{
+   if (lock == NULL
+       || (lock->magic != LockN_MAGIC && lock->magic != LockP_MAGIC))
+      return False;
+   switch (lock->kind) { 
+      case LK_mbRec: case LK_nonRec: case LK_rdwr: break; 
+      default: return False; 
+   }
+   if (lock->heldBy == NULL) {
+      if (lock->acquired_at != NULL) return False;
+      /* Unheld.  We arbitrarily require heldW to be False. */
+      return !lock->heldW;
+   } else {
+      if (lock->acquired_at == NULL) return False;
+   }
+
+   /* If heldBy is non-NULL, we require it to contain at least one
+      thread. */
+   if (VG_(isEmptyBag)(lock->heldBy))
+      return False;
+
+   /* Lock is either r- or w-held. */
+   if (!is_sane_Bag_of_Threads(lock->heldBy)) 
+      return False;
+   if (lock->heldW) {
+      /* Held in write-mode */
+      if ((lock->kind == LK_nonRec || lock->kind == LK_rdwr)
+          && !VG_(isSingletonTotalBag)(lock->heldBy))
+         return False;
+   } else {
+      /* Held in read-mode */
+      if (lock->kind != LK_rdwr) return False;
+   }
+   return True;
+}
+
+Bool HG_(is_sane_LockP) ( Lock* lock ) {
+   return lock != NULL 
+          && lock->magic == LockP_MAGIC
+          && lock->hbso  == NULL
+          && is_sane_Lock_BASE(lock);
+}
+
+Bool HG_(is_sane_LockN) ( Lock* lock ) {
+   return lock != NULL 
+          && lock->magic == LockN_MAGIC
+          && lock->hbso  != NULL
+          && is_sane_Lock_BASE(lock);
+}
+
+Bool HG_(is_sane_LockNorP) ( Lock* lock ) {
+   return is_sane_Lock_BASE(lock);
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                       hg_lock_n_thread.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/hg_lock_n_thread.h.svn-base b/helgrind/.svn/text-base/hg_lock_n_thread.h.svn-base
new file mode 100644
index 0000000..01ae3aa
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_lock_n_thread.h.svn-base
@@ -0,0 +1,165 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Definitions for Locks and Threads.                           ---*/
+/*---                                           hg_lock_n_thread.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __HG_LOCK_N_THREAD_H
+#define __HG_LOCK_N_THREAD_H
+
+
+/*----------------------------------------------------------------*/
+/*--- Primary data definitions                                 ---*/
+/*----------------------------------------------------------------*/
+
+/* Magic numbers, for doing assertions that structures really are of
+   the right type.  Useful as some of the code can get a bit
+   complex. */
+#define Thread_MAGIC   0x504fc5e5
+#define LockN_MAGIC    0x6545b557 /* normal nonpersistent locks */
+#define LockP_MAGIC    0x755b5456 /* persistent (copied) locks */
+
+
+/* These are handles for Word sets.  CONSTRAINTS: must be (very) small
+   ints numbered from zero, since < 30-bit versions of them are used to
+   encode thread-sets and lock-sets in 32-bit shadow words. */
+typedef  WordSet  WordSetID;
+
+
+/* Synchronisation Objects, exported abstractly by libhb. */
+typedef  struct _SO  SO;
+
+/* Thr, libhb's private thread record, exported abstractly */
+typedef  struct _Thr  Thr;
+
+
+/* Stores information about a thread.  Addresses of these also serve
+   as unique thread identifiers and so are never freed, so they should
+   be as small as possible.  Freeing Thread structures makes the
+   storage management just too complex, and most programs don't create
+   many threads, so tolerating this leak seems like a not-bad
+   tradeoff.
+
+   Since these are never freed, the .coretid field only indicates the
+   core's ThreadId associated with this Thread whilst it is alive.
+   Once the thread finishes, the ThreadId is set to
+   VG_INVALID_THREADID.
+
+   The core may later re-use the same ThreadId for what is a logically
+   completely different thread, which of course must have a different
+   Thread structure. */
+typedef
+   struct _Thread {
+      /* ADMIN */
+      struct _Thread* admin;
+      UInt            magic;
+      Thr*            hbthr;
+      ThreadId        coretid;
+      /* USEFUL */
+      WordSetID locksetA; /* WordSet of Lock* currently held by thread */
+      WordSetID locksetW; /* subset of locksetA held in w-mode */
+      /* EXPOSITION */
+      /* Place where parent was when this thread was created. */
+      ExeContext* created_at;
+      Bool        announced;
+      /* Index for generating references in error messages. */
+      Int         errmsg_index;
+   }
+   Thread;
+
+
+/* Stores information about a lock's current state.  These are
+   allocated and later freed (when the containing memory becomes
+   NoAccess).  This gives a problem for the XError type, which
+   contains Lock*s.  Solution is to copy any Lock which is to be
+   incorporated into an XErrors, so as to make it independent from the
+   'normal' collection of Locks, which can come and go.  When the lock
+   is copied, its .magic is changed from LockN_Magic to
+   LockP_Magic. */
+
+/* Lock kinds. */
+typedef
+   enum {
+      LK_mbRec=1001, /* normal mutex, possibly recursive */
+      LK_nonRec,     /* normal mutex, definitely non recursive */
+      LK_rdwr        /* reader-writer lock */
+   }
+   LockKind;
+
+typedef
+   struct _Lock {
+      /* ADMIN */
+      struct _Lock* admin;
+      ULong         unique; /* used for persistence-hashing */
+      UInt          magic;  /* LockN_MAGIC or LockP_MAGIC */
+      /* EXPOSITION */
+      /* Place where lock first came to the attention of Helgrind. */
+      ExeContext*   appeared_at;
+      /* If the lock is held, place where the lock most recently made
+         an unlocked->locked transition.  Must be sync'd with .heldBy:
+         either both NULL or both non-NULL. */
+      ExeContext*   acquired_at;
+      /* USEFUL-STATIC */
+      SO*           hbso;      /* associated SO */
+      Addr          guestaddr; /* Guest address of lock */
+      LockKind      kind;      /* what kind of lock this is */
+      /* USEFUL-DYNAMIC */
+      Bool          heldW; 
+      WordBag*      heldBy; /* bag of threads that hold this lock */
+      /* .heldBy is NULL: lock is unheld, and .heldW is meaningless
+                          but arbitrarily set to False
+         .heldBy is non-NULL:
+            .heldW is True:  lock is w-held by threads in heldBy
+            .heldW is False: lock is r-held by threads in heldBy
+            Either way, heldBy may not validly be an empty Bag.
+
+         for LK_nonRec, r-holdings are not allowed, and w-holdings may
+         only have sizeTotal(heldBy) == 1
+
+         for LK_mbRec, r-holdings are not allowed, and w-holdings may
+         only have sizeUnique(heldBy) == 1
+
+         for LK_rdwr, w-holdings may only have sizeTotal(heldBy) == 1 */
+   }
+   Lock;
+
+/*----------------------------------------------------------------*/
+/*--- Sanity checking                                          ---*/
+/*----------------------------------------------------------------*/
+
+Bool HG_(is_sane_Thread)   ( Thread* thr );
+Bool HG_(is_sane_LockP)    ( Lock* lock );
+Bool HG_(is_sane_LockN)    ( Lock* lock );
+Bool HG_(is_sane_LockNorP) ( Lock* lock );
+
+
+#endif /* ! __HG_LOCK_N_THREAD_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                       hg_lock_n_thread.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/hg_main.c.svn-base b/helgrind/.svn/text-base/hg_main.c.svn-base
new file mode 100644
index 0000000..d63d73b
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_main.c.svn-base
@@ -0,0 +1,4287 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Helgrind: a Valgrind tool for detecting errors               ---*/
+/*--- in threaded programs.                              hg_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks LLP
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_threadstate.h"
+#include "pub_tool_tooliface.h"
+#include "pub_tool_hashtable.h"
+#include "pub_tool_replacemalloc.h"
+#include "pub_tool_machine.h"
+#include "pub_tool_options.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_stacktrace.h"
+#include "pub_tool_debuginfo.h"  /* VG_(get_data_description) */
+#include "pub_tool_wordfm.h"
+
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"
+#include "hg_errors.h"
+
+#include "libhb.h"
+
+#include "helgrind.h"
+
+
+// FIXME: new_mem_w_tid ignores the supplied tid. (wtf?!)
+
+// FIXME: when client destroys a lock or a CV, remove these
+// from our mappings, so that the associated SO can be freed up
+
+/*----------------------------------------------------------------*/
+/*---                                                          ---*/
+/*----------------------------------------------------------------*/
+
+/* Note this needs to be compiled with -fno-strict-aliasing, since it
+   contains a whole bunch of calls to lookupFM etc which cast between
+   Word and pointer types.  gcc rightly complains this breaks ANSI C
+   strict aliasing rules, at -O2.  No complaints at -O, but -O2 gives
+   worthwhile performance benefits over -O.
+*/
+
+// FIXME catch sync signals (SEGV, basically) and unlock BHL,
+// if held.  Otherwise a LOCK-prefixed insn which segfaults 
+// gets Helgrind into a total muddle as the BHL will not be
+// released after the insn.
+
+// FIXME what is supposed to happen to locks in memory which
+// is relocated as a result of client realloc?
+
+// FIXME put referencing ThreadId into Thread and get
+// rid of the slow reverse mapping function.
+
+// FIXME accesses to NoAccess areas: change state to Excl?
+
+// FIXME report errors for accesses of NoAccess memory?
+
+// FIXME pth_cond_wait/timedwait wrappers.  Even if these fail,
+// the thread still holds the lock.
+
+/* ------------ Debug/trace options ------------ */
+
+// this is:
+// shadow_mem_make_NoAccess: 29156 SMs, 1728 scanned
+// happens_before_wrk: 1000
+// ev__post_thread_join: 3360 SMs, 29 scanned, 252 re-Excls
+#define SHOW_EXPENSIVE_STUFF 0
+
+// 0 for silent, 1 for some stuff, 2 for lots of stuff
+#define SHOW_EVENTS 0
+
+
+static void all__sanity_check ( Char* who ); /* fwds */
+
+#define HG_CLI__MALLOC_REDZONE_SZB 16 /* let's say */
+
+// 0 for none, 1 for dump at end of run
+#define SHOW_DATA_STRUCTURES 0
+
+
+/* ------------ Misc comments ------------ */
+
+// FIXME: don't hardwire initial entries for root thread.
+// Instead, let the pre_thread_ll_create handler do this.
+
+
+/*----------------------------------------------------------------*/
+/*--- Primary data structures                                  ---*/
+/*----------------------------------------------------------------*/
+
+/* Admin linked list of Threads */
+static Thread* admin_threads = NULL;
+
+/* Admin linked list of Locks */
+static Lock* admin_locks = NULL;
+
+/* Mapping table for core ThreadIds to Thread* */
+static Thread** map_threads = NULL; /* Array[VG_N_THREADS] of Thread* */
+
+/* Mapping table for lock guest addresses to Lock* */
+static WordFM* map_locks = NULL; /* WordFM LockAddr Lock* */
+
+/* The word-set universes for thread sets and lock sets. */
+static WordSetU* univ_tsets = NULL; /* sets of Thread* */
+static WordSetU* univ_lsets = NULL; /* sets of Lock* */
+static WordSetU* univ_laog  = NULL; /* sets of Lock*, for LAOG */
+
+/* never changed; we only care about its address.  Is treated as if it
+   was a standard userspace lock.  Also we have a Lock* describing it
+   so it can participate in lock sets in the usual way. */
+static Int   __bus_lock = 0;
+static Lock* __bus_lock_Lock = NULL;
+
+
+/*----------------------------------------------------------------*/
+/*--- Simple helpers for the data structures                   ---*/
+/*----------------------------------------------------------------*/
+
+static UWord stats__lockN_acquires = 0;
+static UWord stats__lockN_releases = 0;
+
+static
+ThreadId map_threads_maybe_reverse_lookup_SLOW ( Thread* thr ); /*fwds*/
+
+/* --------- Constructors --------- */
+
+static Thread* mk_Thread ( Thr* hbthr ) {
+   static Int indx      = 1;
+   Thread* thread       = HG_(zalloc)( "hg.mk_Thread.1", sizeof(Thread) );
+   thread->locksetA     = HG_(emptyWS)( univ_lsets );
+   thread->locksetW     = HG_(emptyWS)( univ_lsets );
+   thread->magic        = Thread_MAGIC;
+   thread->hbthr        = hbthr;
+   thread->coretid      = VG_INVALID_THREADID;
+   thread->created_at   = NULL;
+   thread->announced    = False;
+   thread->errmsg_index = indx++;
+   thread->admin        = admin_threads;
+   admin_threads        = thread;
+   return thread;
+}
+
+// Make a new lock which is unlocked (hence ownerless)
+static Lock* mk_LockN ( LockKind kind, Addr guestaddr ) {
+   static ULong unique = 0;
+   Lock* lock             = HG_(zalloc)( "hg.mk_Lock.1", sizeof(Lock) );
+   lock->admin            = admin_locks;
+   lock->unique           = unique++;
+   lock->magic            = LockN_MAGIC;
+   lock->appeared_at      = NULL;
+   lock->acquired_at      = NULL;
+   lock->hbso             = libhb_so_alloc();
+   lock->guestaddr        = guestaddr;
+   lock->kind             = kind;
+   lock->heldW            = False;
+   lock->heldBy           = NULL;
+   tl_assert(HG_(is_sane_LockN)(lock));
+   admin_locks            = lock;
+   return lock;
+}
+
+/* Release storage for a Lock.  Also release storage in .heldBy, if
+   any. */
+static void del_LockN ( Lock* lk ) 
+{
+   tl_assert(HG_(is_sane_LockN)(lk));
+   tl_assert(lk->hbso);
+   libhb_so_dealloc(lk->hbso);
+   if (lk->heldBy)
+      VG_(deleteBag)( lk->heldBy );
+   VG_(memset)(lk, 0xAA, sizeof(*lk));
+   HG_(free)(lk);
+}
+
+/* Update 'lk' to reflect that 'thr' now has a write-acquisition of
+   it.  This is done strictly: only combinations resulting from
+   correct program and libpthread behaviour are allowed. */
+static void lockN_acquire_writer ( Lock* lk, Thread* thr ) 
+{
+   tl_assert(HG_(is_sane_LockN)(lk));
+   tl_assert(HG_(is_sane_Thread)(thr));
+
+   stats__lockN_acquires++;
+
+   /* EXPOSITION only */
+   /* We need to keep recording snapshots of where the lock was
+      acquired, so as to produce better lock-order error messages. */
+   if (lk->acquired_at == NULL) {
+      ThreadId tid;
+      tl_assert(lk->heldBy == NULL);
+      tid = map_threads_maybe_reverse_lookup_SLOW(thr);
+      lk->acquired_at
+         = VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
+   } else {
+      tl_assert(lk->heldBy != NULL);
+   }
+   /* end EXPOSITION only */
+
+   switch (lk->kind) {
+      case LK_nonRec:
+      case_LK_nonRec:
+         tl_assert(lk->heldBy == NULL); /* can't w-lock recursively */
+         tl_assert(!lk->heldW);
+         lk->heldW  = True;
+         lk->heldBy = VG_(newBag)( HG_(zalloc), "hg.lNaw.1", HG_(free) );
+         VG_(addToBag)( lk->heldBy, (Word)thr );
+         break;
+      case LK_mbRec:
+         if (lk->heldBy == NULL)
+            goto case_LK_nonRec;
+         /* 2nd and subsequent locking of a lock by its owner */
+         tl_assert(lk->heldW);
+         /* assert: lk is only held by one thread .. */
+         tl_assert(VG_(sizeUniqueBag(lk->heldBy)) == 1);
+         /* assert: .. and that thread is 'thr'. */
+         tl_assert(VG_(elemBag)(lk->heldBy, (Word)thr)
+                   == VG_(sizeTotalBag)(lk->heldBy));
+         VG_(addToBag)(lk->heldBy, (Word)thr);
+         break;
+      case LK_rdwr:
+         tl_assert(lk->heldBy == NULL && !lk->heldW); /* must be unheld */
+         goto case_LK_nonRec;
+      default: 
+         tl_assert(0);
+  }
+  tl_assert(HG_(is_sane_LockN)(lk));
+}
+
+static void lockN_acquire_reader ( Lock* lk, Thread* thr )
+{
+   tl_assert(HG_(is_sane_LockN)(lk));
+   tl_assert(HG_(is_sane_Thread)(thr));
+   /* can only add reader to a reader-writer lock. */
+   tl_assert(lk->kind == LK_rdwr);
+   /* lk must be free or already r-held. */
+   tl_assert(lk->heldBy == NULL 
+             || (lk->heldBy != NULL && !lk->heldW));
+
+   stats__lockN_acquires++;
+
+   /* EXPOSITION only */
+   /* We need to keep recording snapshots of where the lock was
+      acquired, so as to produce better lock-order error messages. */
+   if (lk->acquired_at == NULL) {
+      ThreadId tid;
+      tl_assert(lk->heldBy == NULL);
+      tid = map_threads_maybe_reverse_lookup_SLOW(thr);
+      lk->acquired_at
+         = VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
+   } else {
+      tl_assert(lk->heldBy != NULL);
+   }
+   /* end EXPOSITION only */
+
+   if (lk->heldBy) {
+      VG_(addToBag)(lk->heldBy, (Word)thr);
+   } else {
+      lk->heldW  = False;
+      lk->heldBy = VG_(newBag)( HG_(zalloc), "hg.lNar.1", HG_(free) );
+      VG_(addToBag)( lk->heldBy, (Word)thr );
+   }
+   tl_assert(!lk->heldW);
+   tl_assert(HG_(is_sane_LockN)(lk));
+}
+
+/* Update 'lk' to reflect a release of it by 'thr'.  This is done
+   strictly: only combinations resulting from correct program and
+   libpthread behaviour are allowed. */
+
+static void lockN_release ( Lock* lk, Thread* thr )
+{
+   Bool b;
+   tl_assert(HG_(is_sane_LockN)(lk));
+   tl_assert(HG_(is_sane_Thread)(thr));
+   /* lock must be held by someone */
+   tl_assert(lk->heldBy);
+   stats__lockN_releases++;
+   /* Remove it from the holder set */
+   b = VG_(delFromBag)(lk->heldBy, (Word)thr);
+   /* thr must actually have been a holder of lk */
+   tl_assert(b);
+   /* normalise */
+   tl_assert(lk->acquired_at);
+   if (VG_(isEmptyBag)(lk->heldBy)) {
+      VG_(deleteBag)(lk->heldBy);
+      lk->heldBy      = NULL;
+      lk->heldW       = False;
+      lk->acquired_at = NULL;
+   }
+   tl_assert(HG_(is_sane_LockN)(lk));
+}
+
+static void remove_Lock_from_locksets_of_all_owning_Threads( Lock* lk )
+{
+   Thread* thr;
+   if (!lk->heldBy) {
+      tl_assert(!lk->heldW);
+      return;
+   }
+   /* for each thread that holds this lock do ... */
+   VG_(initIterBag)( lk->heldBy );
+   while (VG_(nextIterBag)( lk->heldBy, (Word*)&thr, NULL )) {
+      tl_assert(HG_(is_sane_Thread)(thr));
+      tl_assert(HG_(elemWS)( univ_lsets,
+                             thr->locksetA, (Word)lk ));
+      thr->locksetA
+         = HG_(delFromWS)( univ_lsets, thr->locksetA, (Word)lk );
+
+      if (lk->heldW) {
+         tl_assert(HG_(elemWS)( univ_lsets,
+                                thr->locksetW, (Word)lk ));
+         thr->locksetW
+            = HG_(delFromWS)( univ_lsets, thr->locksetW, (Word)lk );
+      }
+   }
+   VG_(doneIterBag)( lk->heldBy );
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- Print out the primary data structures                    ---*/
+/*----------------------------------------------------------------*/
+
+//static WordSetID del_BHL ( WordSetID lockset ); /* fwds */
+
+#define PP_THREADS      (1<<1)
+#define PP_LOCKS        (1<<2)
+#define PP_ALL (PP_THREADS | PP_LOCKS)
+
+
+static const Int sHOW_ADMIN = 0;
+
+static void space ( Int n )
+{
+   Int  i;
+   Char spaces[128+1];
+   tl_assert(n >= 0 && n < 128);
+   if (n == 0)
+      return;
+   for (i = 0; i < n; i++)
+      spaces[i] = ' ';
+   spaces[i] = 0;
+   tl_assert(i < 128+1);
+   VG_(printf)("%s", spaces);
+}
+
+static void pp_Thread ( Int d, Thread* t )
+{
+   space(d+0); VG_(printf)("Thread %p {\n", t);
+   if (sHOW_ADMIN) {
+   space(d+3); VG_(printf)("admin    %p\n",   t->admin);
+   space(d+3); VG_(printf)("magic    0x%x\n", (UInt)t->magic);
+   }
+   space(d+3); VG_(printf)("locksetA %d\n",   (Int)t->locksetA);
+   space(d+3); VG_(printf)("locksetW %d\n",   (Int)t->locksetW);
+   space(d+0); VG_(printf)("}\n");
+}
+
+static void pp_admin_threads ( Int d )
+{
+   Int     i, n;
+   Thread* t;
+   for (n = 0, t = admin_threads;  t;  n++, t = t->admin) {
+      /* nothing */
+   }
+   space(d); VG_(printf)("admin_threads (%d records) {\n", n);
+   for (i = 0, t = admin_threads;  t;  i++, t = t->admin) {
+      if (0) {
+         space(n); 
+         VG_(printf)("admin_threads record %d of %d:\n", i, n);
+      }
+      pp_Thread(d+3, t);
+   }
+   space(d); VG_(printf)("}\n");
+}
+
+static void pp_map_threads ( Int d )
+{
+   Int i, n = 0;
+   space(d); VG_(printf)("map_threads ");
+   for (i = 0; i < VG_N_THREADS; i++) {
+      if (map_threads[i] != NULL)
+         n++;
+   }
+   VG_(printf)("(%d entries) {\n", n);
+   for (i = 0; i < VG_N_THREADS; i++) {
+      if (map_threads[i] == NULL)
+         continue;
+      space(d+3);
+      VG_(printf)("coretid %d -> Thread %p\n", i, map_threads[i]);
+   }
+   space(d); VG_(printf)("}\n");
+}
+
+static const HChar* show_LockKind ( LockKind lkk ) {
+   switch (lkk) {
+      case LK_mbRec:  return "mbRec";
+      case LK_nonRec: return "nonRec";
+      case LK_rdwr:   return "rdwr";
+      default:        tl_assert(0);
+   }
+}
+
+static void pp_Lock ( Int d, Lock* lk )
+{
+   space(d+0); VG_(printf)("Lock %p (ga %#lx) {\n", lk, lk->guestaddr);
+   if (sHOW_ADMIN) {
+      space(d+3); VG_(printf)("admin  %p\n",   lk->admin);
+      space(d+3); VG_(printf)("magic  0x%x\n", (UInt)lk->magic);
+   }
+   space(d+3); VG_(printf)("unique %llu\n", lk->unique);
+   space(d+3); VG_(printf)("kind   %s\n", show_LockKind(lk->kind));
+   space(d+3); VG_(printf)("heldW  %s\n", lk->heldW ? "yes" : "no");
+   space(d+3); VG_(printf)("heldBy %p", lk->heldBy);
+   if (lk->heldBy) {
+      Thread* thr;
+      Word    count;
+      VG_(printf)(" { ");
+      VG_(initIterBag)( lk->heldBy );
+      while (VG_(nextIterBag)( lk->heldBy, (Word*)&thr, &count ))
+         VG_(printf)("%lu:%p ", count, thr);
+      VG_(doneIterBag)( lk->heldBy );
+      VG_(printf)("}");
+   }
+   VG_(printf)("\n");
+   space(d+0); VG_(printf)("}\n");
+}
+
+static void pp_admin_locks ( Int d )
+{
+   Int   i, n;
+   Lock* lk;
+   for (n = 0, lk = admin_locks;  lk;  n++, lk = lk->admin) {
+      /* nothing */
+   }
+   space(d); VG_(printf)("admin_locks (%d records) {\n", n);
+   for (i = 0, lk = admin_locks;  lk;  i++, lk = lk->admin) {
+      if (0) {
+         space(n); 
+         VG_(printf)("admin_locks record %d of %d:\n", i, n);
+      }
+      pp_Lock(d+3, lk);
+   }
+   space(d); VG_(printf)("}\n");
+}
+
+static void pp_map_locks ( Int d )
+{
+   void* gla;
+   Lock* lk;
+   space(d); VG_(printf)("map_locks (%d entries) {\n",
+                         (Int)VG_(sizeFM)( map_locks ));
+   VG_(initIterFM)( map_locks );
+   while (VG_(nextIterFM)( map_locks, (Word*)&gla,
+                                      (Word*)&lk )) {
+      space(d+3);
+      VG_(printf)("guest %p -> Lock %p\n", gla, lk);
+   }
+   VG_(doneIterFM)( map_locks );
+   space(d); VG_(printf)("}\n");
+}
+
+static void pp_everything ( Int flags, Char* caller )
+{
+   Int d = 0;
+   VG_(printf)("\n");
+   VG_(printf)("All_Data_Structures (caller = \"%s\") {\n", caller);
+   if (flags & PP_THREADS) {
+      VG_(printf)("\n");
+      pp_admin_threads(d+3);
+      VG_(printf)("\n");
+      pp_map_threads(d+3);
+   }
+   if (flags & PP_LOCKS) {
+      VG_(printf)("\n");
+      pp_admin_locks(d+3);
+      VG_(printf)("\n");
+      pp_map_locks(d+3);
+   }
+
+   VG_(printf)("\n");
+   VG_(printf)("}\n");
+   VG_(printf)("\n");
+}
+
+#undef SHOW_ADMIN
+
+
+/*----------------------------------------------------------------*/
+/*--- Initialise the primary data structures                   ---*/
+/*----------------------------------------------------------------*/
+
+static void initialise_data_structures ( Thr* hbthr_root )
+{
+   Thread*   thr;
+
+   /* Get everything initialised and zeroed. */
+   tl_assert(admin_threads == NULL);
+   tl_assert(admin_locks == NULL);
+
+   tl_assert(sizeof(Addr) == sizeof(Word));
+
+   tl_assert(map_threads == NULL);
+   map_threads = HG_(zalloc)( "hg.ids.1", VG_N_THREADS * sizeof(Thread*) );
+   tl_assert(map_threads != NULL);
+
+   tl_assert(sizeof(Addr) == sizeof(Word));
+   tl_assert(map_locks == NULL);
+   map_locks = VG_(newFM)( HG_(zalloc), "hg.ids.2", HG_(free), 
+                           NULL/*unboxed Word cmp*/);
+   tl_assert(map_locks != NULL);
+
+   __bus_lock_Lock = mk_LockN( LK_nonRec, (Addr)&__bus_lock );
+   tl_assert(HG_(is_sane_LockN)(__bus_lock_Lock));
+   VG_(addToFM)( map_locks, (Word)&__bus_lock, (Word)__bus_lock_Lock );
+
+   tl_assert(univ_tsets == NULL);
+   univ_tsets = HG_(newWordSetU)( HG_(zalloc), "hg.ids.3", HG_(free),
+                                  8/*cacheSize*/ );
+   tl_assert(univ_tsets != NULL);
+
+   tl_assert(univ_lsets == NULL);
+   univ_lsets = HG_(newWordSetU)( HG_(zalloc), "hg.ids.4", HG_(free),
+                                  8/*cacheSize*/ );
+   tl_assert(univ_lsets != NULL);
+
+   tl_assert(univ_laog == NULL);
+   univ_laog = HG_(newWordSetU)( HG_(zalloc), "hg.ids.5 (univ_laog)",
+                                 HG_(free), 24/*cacheSize*/ );
+   tl_assert(univ_laog != NULL);
+
+   /* Set up entries for the root thread */
+   // FIXME: this assumes that the first real ThreadId is 1
+
+   /* a Thread for the new thread ... */
+   thr = mk_Thread(hbthr_root);
+   thr->coretid = 1; /* FIXME: hardwires an assumption about the
+                        identity of the root thread. */
+   tl_assert( libhb_get_Thr_opaque(hbthr_root) == NULL );
+   libhb_set_Thr_opaque(hbthr_root, thr);
+
+   /* and bind it in the thread-map table. */
+   tl_assert(HG_(is_sane_ThreadId)(thr->coretid));
+   tl_assert(thr->coretid != VG_INVALID_THREADID);
+
+   map_threads[thr->coretid] = thr;
+
+   tl_assert(VG_INVALID_THREADID == 0);
+
+   /* Mark the new bus lock correctly (to stop the sanity checks
+      complaining) */
+   tl_assert( sizeof(__bus_lock) == 4 );
+
+   all__sanity_check("initialise_data_structures");
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- map_threads :: array[core-ThreadId] of Thread*           ---*/
+/*----------------------------------------------------------------*/
+
+/* Doesn't assert if the relevant map_threads entry is NULL. */
+static Thread* map_threads_maybe_lookup ( ThreadId coretid )
+{
+   Thread* thr;
+   tl_assert( HG_(is_sane_ThreadId)(coretid) );
+   thr = map_threads[coretid];
+   return thr;
+}
+
+/* Asserts if the relevant map_threads entry is NULL. */
+static inline Thread* map_threads_lookup ( ThreadId coretid )
+{
+   Thread* thr;
+   tl_assert( HG_(is_sane_ThreadId)(coretid) );
+   thr = map_threads[coretid];
+   tl_assert(thr);
+   return thr;
+}
+
+/* Do a reverse lookup.  Does not assert if 'thr' is not found in
+   map_threads. */
+static ThreadId map_threads_maybe_reverse_lookup_SLOW ( Thread* thr )
+{
+   ThreadId tid;
+   tl_assert(HG_(is_sane_Thread)(thr));
+   /* Check nobody used the invalid-threadid slot */
+   tl_assert(VG_INVALID_THREADID >= 0 && VG_INVALID_THREADID < VG_N_THREADS);
+   tl_assert(map_threads[VG_INVALID_THREADID] == NULL);
+   tid = thr->coretid;
+   tl_assert(HG_(is_sane_ThreadId)(tid));
+   return tid;
+}
+
+/* Do a reverse lookup.  Warning: POTENTIALLY SLOW.  Asserts if 'thr'
+   is not found in map_threads. */
+static ThreadId map_threads_reverse_lookup_SLOW ( Thread* thr )
+{
+   ThreadId tid = map_threads_maybe_reverse_lookup_SLOW( thr );
+   tl_assert(tid != VG_INVALID_THREADID);
+   tl_assert(map_threads[tid]);
+   tl_assert(map_threads[tid]->coretid == tid);
+   return tid;
+}
+
+static void map_threads_delete ( ThreadId coretid )
+{
+   Thread* thr;
+   tl_assert(coretid != 0);
+   tl_assert( HG_(is_sane_ThreadId)(coretid) );
+   thr = map_threads[coretid];
+   tl_assert(thr);
+   map_threads[coretid] = NULL;
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- map_locks :: WordFM guest-Addr-of-lock Lock*             ---*/
+/*----------------------------------------------------------------*/
+
+/* Make sure there is a lock table entry for the given (lock) guest
+   address.  If not, create one of the stated 'kind' in unheld state.
+   In any case, return the address of the existing or new Lock. */
+static 
+Lock* map_locks_lookup_or_create ( LockKind lkk, Addr ga, ThreadId tid )
+{
+   Bool  found;
+   Lock* oldlock = NULL;
+   tl_assert(HG_(is_sane_ThreadId)(tid));
+   found = VG_(lookupFM)( map_locks, 
+                          NULL, (Word*)&oldlock, (Word)ga );
+   if (!found) {
+      Lock* lock = mk_LockN(lkk, ga);
+      lock->appeared_at = VG_(record_ExeContext)( tid, 0 );
+      tl_assert(HG_(is_sane_LockN)(lock));
+      VG_(addToFM)( map_locks, (Word)ga, (Word)lock );
+      tl_assert(oldlock == NULL);
+      return lock;
+   } else {
+      tl_assert(oldlock != NULL);
+      tl_assert(HG_(is_sane_LockN)(oldlock));
+      tl_assert(oldlock->guestaddr == ga);
+      return oldlock;
+   }
+}
+
+static Lock* map_locks_maybe_lookup ( Addr ga )
+{
+   Bool  found;
+   Lock* lk = NULL;
+   found = VG_(lookupFM)( map_locks, NULL, (Word*)&lk, (Word)ga );
+   tl_assert(found  ?  lk != NULL  :  lk == NULL);
+   return lk;
+}
+
+static void map_locks_delete ( Addr ga )
+{
+   Addr  ga2 = 0;
+   Lock* lk  = NULL;
+   VG_(delFromFM)( map_locks,
+                   (Word*)&ga2, (Word*)&lk, (Word)ga );
+   /* delFromFM produces the val which is being deleted, if it is
+      found.  So assert it is non-null; that in effect asserts that we
+      are deleting a (ga, Lock) pair which actually exists. */
+   tl_assert(lk != NULL);
+   tl_assert(ga2 == ga);
+}
+
+
+
+/*----------------------------------------------------------------*/
+/*--- Sanity checking the data structures                      ---*/
+/*----------------------------------------------------------------*/
+
+static UWord stats__sanity_checks = 0;
+
+static void laog__sanity_check ( Char* who ); /* fwds */
+
+/* REQUIRED INVARIANTS:
+
+   Thread vs Segment/Lock/SecMaps
+
+      for each t in Threads {
+
+         // Thread.lockset: each element is really a valid Lock
+
+         // Thread.lockset: each Lock in set is actually held by that thread
+         for lk in Thread.lockset 
+            lk == LockedBy(t)
+
+         // Thread.csegid is a valid SegmentID
+         // and the associated Segment has .thr == t
+
+      }
+
+      all thread Locksets are pairwise empty under intersection
+      (that is, no lock is claimed to be held by more than one thread)
+      -- this is guaranteed if all locks in locksets point back to their
+      owner threads
+
+   Lock vs Thread/Segment/SecMaps
+
+      for each entry (gla, la) in map_locks
+         gla == la->guest_addr
+
+      for each lk in Locks {
+
+         lk->tag is valid
+         lk->guest_addr does not have shadow state NoAccess
+         if lk == LockedBy(t), then t->lockset contains lk
+         if lk == UnlockedBy(segid) then segid is valid SegmentID
+             and can be mapped to a valid Segment(seg)
+             and seg->thr->lockset does not contain lk
+         if lk == UnlockedNew then (no lockset contains lk)
+
+         secmaps for lk has .mbHasLocks == True
+
+      }
+
+   Segment vs Thread/Lock/SecMaps
+
+      the Segment graph is a dag (no cycles)
+      all of the Segment graph must be reachable from the segids
+         mentioned in the Threads
+
+      for seg in Segments {
+
+         seg->thr is a sane Thread
+
+      }
+
+   SecMaps vs Segment/Thread/Lock
+
+      for sm in SecMaps {
+
+         sm properly aligned
+         if any shadow word is ShR or ShM then .mbHasShared == True
+
+         for each Excl(segid) state
+            map_segments_lookup maps to a sane Segment(seg)
+         for each ShM/ShR(tsetid,lsetid) state
+            each lk in lset is a valid Lock
+            each thr in tset is a valid thread, which is non-dead
+
+      }
+*/
+
+
+/* Return True iff 'thr' holds 'lk' in some mode. */
+static Bool thread_is_a_holder_of_Lock ( Thread* thr, Lock* lk )
+{
+   if (lk->heldBy)
+      return VG_(elemBag)( lk->heldBy, (Word)thr ) > 0;
+   else
+      return False;
+}
+
+/* Sanity check Threads, as far as possible */
+__attribute__((noinline))
+static void threads__sanity_check ( Char* who )
+{
+#define BAD(_str) do { how = (_str); goto bad; } while (0)
+   Char*     how = "no error";
+   Thread*   thr;
+   WordSetID wsA, wsW;
+   UWord*    ls_words;
+   Word      ls_size, i;
+   Lock*     lk;
+   for (thr = admin_threads; thr; thr = thr->admin) {
+      if (!HG_(is_sane_Thread)(thr)) BAD("1");
+      wsA = thr->locksetA;
+      wsW = thr->locksetW;
+      // locks held in W mode are a subset of all locks held
+      if (!HG_(isSubsetOf)( univ_lsets, wsW, wsA )) BAD("7");
+      HG_(getPayloadWS)( &ls_words, &ls_size, univ_lsets, wsA );
+      for (i = 0; i < ls_size; i++) {
+         lk = (Lock*)ls_words[i];
+         // Thread.lockset: each element is really a valid Lock
+         if (!HG_(is_sane_LockN)(lk)) BAD("2");
+         // Thread.lockset: each Lock in set is actually held by that
+         // thread
+         if (!thread_is_a_holder_of_Lock(thr,lk)) BAD("3");
+      }
+   }
+   return;
+  bad:
+   VG_(printf)("threads__sanity_check: who=\"%s\", bad=\"%s\"\n", who, how);
+   tl_assert(0);
+#undef BAD
+}
+
+
+/* Sanity check Locks, as far as possible */
+__attribute__((noinline))
+static void locks__sanity_check ( Char* who )
+{
+#define BAD(_str) do { how = (_str); goto bad; } while (0)
+   Char*     how = "no error";
+   Addr      gla;
+   Lock*     lk;
+   Int       i;
+   // # entries in admin_locks == # entries in map_locks
+   for (i = 0, lk = admin_locks;  lk;  i++, lk = lk->admin)
+      ;
+   if (i != VG_(sizeFM)(map_locks)) BAD("1");
+   // for each entry (gla, lk) in map_locks
+   //      gla == lk->guest_addr
+   VG_(initIterFM)( map_locks );
+   while (VG_(nextIterFM)( map_locks,
+                           (Word*)&gla, (Word*)&lk )) {
+      if (lk->guestaddr != gla) BAD("2");
+   }
+   VG_(doneIterFM)( map_locks );
+   // scan through admin_locks ...
+   for (lk = admin_locks; lk; lk = lk->admin) {
+      // lock is sane.  Quite comprehensive, also checks that
+      // referenced (holder) threads are sane.
+      if (!HG_(is_sane_LockN)(lk)) BAD("3");
+      // map_locks binds guest address back to this lock
+      if (lk != map_locks_maybe_lookup(lk->guestaddr)) BAD("4");
+      // look at all threads mentioned as holders of this lock.  Ensure
+      // this lock is mentioned in their locksets.
+      if (lk->heldBy) {
+         Thread* thr;
+         Word    count;
+         VG_(initIterBag)( lk->heldBy );
+         while (VG_(nextIterBag)( lk->heldBy, 
+                                  (Word*)&thr, &count )) {
+            // HG_(is_sane_LockN) above ensures these
+            tl_assert(count >= 1);
+            tl_assert(HG_(is_sane_Thread)(thr));
+            if (!HG_(elemWS)(univ_lsets, thr->locksetA, (Word)lk)) 
+               BAD("6");
+            // also check the w-only lockset
+            if (lk->heldW 
+                && !HG_(elemWS)(univ_lsets, thr->locksetW, (Word)lk)) 
+               BAD("7");
+            if ((!lk->heldW)
+                && HG_(elemWS)(univ_lsets, thr->locksetW, (Word)lk)) 
+               BAD("8");
+         }
+         VG_(doneIterBag)( lk->heldBy );
+      } else {
+         /* lock not held by anybody */
+         if (lk->heldW) BAD("9"); /* should be False if !heldBy */
+         // since lk is unheld, then (no lockset contains lk)
+         // hmm, this is really too expensive to check.  Hmm.
+      }
+   }
+
+   return;
+  bad:
+   VG_(printf)("locks__sanity_check: who=\"%s\", bad=\"%s\"\n", who, how);
+   tl_assert(0);
+#undef BAD
+}
+
+
+static void all_except_Locks__sanity_check ( Char* who ) {
+   stats__sanity_checks++;
+   if (0) VG_(printf)("all_except_Locks__sanity_check(%s)\n", who);
+   threads__sanity_check(who);
+   laog__sanity_check(who);
+}
+static void all__sanity_check ( Char* who ) {
+   all_except_Locks__sanity_check(who);
+   locks__sanity_check(who);
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- the core memory state machine (msm__* functions)         ---*/
+/*----------------------------------------------------------------*/
+
+//static WordSetID add_BHL ( WordSetID lockset ) {
+//   return HG_(addToWS)( univ_lsets, lockset, (Word)__bus_lock_Lock );
+//}
+//static WordSetID del_BHL ( WordSetID lockset ) {
+//   return HG_(delFromWS)( univ_lsets, lockset, (Word)__bus_lock_Lock );
+//}
+
+
+///* Last-lock-lossage records.  This mechanism exists to help explain
+//   to programmers why we are complaining about a race.  The idea is to
+//   monitor all lockset transitions.  When a previously nonempty
+//   lockset becomes empty, the lock(s) that just disappeared (the
+//   "lossage") are the locks that have consistently protected the
+//   location (ga_of_access) in question for the longest time.  Most of
+//   the time the lossage-set is a single lock.  Because the
+//   lossage-lock is the one that has survived longest, there is there
+//   is a good chance that it is indeed the lock that the programmer
+//   intended to use to protect the location.
+//
+//   Note that we cannot in general just look at the lossage set when we
+//   see a transition to ShM(...,empty-set), because a transition to an
+//   empty lockset can happen arbitrarily far before the point where we
+//   want to report an error.  This is in the case where there are many
+//   transitions ShR -> ShR, all with an empty lockset, and only later
+//   is there a transition to ShM.  So what we want to do is note the
+//   lossage lock at the point where a ShR -> ShR transition empties out
+//   the lockset, so we can present it later if there should be a
+//   transition to ShM.
+//
+//   So this function finds such transitions.  For each, it associates
+//   in ga_to_lastlock, the guest address and the lossage lock.  In fact
+//   we do not record the Lock* directly as that may disappear later,
+//   but instead the ExeContext inside the Lock which says where it was
+//   initialised or first locked.  ExeContexts are permanent so keeping
+//   them indefinitely is safe.
+//
+//   A boring detail: the hardware bus lock is not interesting in this
+//   respect, so we first remove that from the pre/post locksets.
+//*/
+//
+//static UWord stats__ga_LL_adds = 0;
+//
+//static WordFM* ga_to_lastlock = NULL; /* GuestAddr -> ExeContext* */
+//
+//static 
+//void record_last_lock_lossage ( Addr ga_of_access,
+//                                WordSetID lset_old, WordSetID lset_new )
+//{
+//   Lock* lk;
+//   Int   card_old, card_new;
+//
+//   tl_assert(lset_old != lset_new);
+//
+//   if (0) VG_(printf)("XX1: %d (card %ld) -> %d (card %ld) %#lx\n",
+//                      (Int)lset_old, 
+//                      HG_(cardinalityWS)(univ_lsets,lset_old),
+//                      (Int)lset_new, 
+//                      HG_(cardinalityWS)(univ_lsets,lset_new),
+//                      ga_of_access );
+//
+//   /* This is slow, but at least it's simple.  The bus hardware lock
+//      just confuses the logic, so remove it from the locksets we're
+//      considering before doing anything else. */
+//   lset_new = del_BHL( lset_new );
+//
+//   if (!HG_(isEmptyWS)( univ_lsets, lset_new )) {
+//      /* The post-transition lock set is not empty.  So we are not
+//         interested.  We're only interested in spotting transitions
+//         that make locksets become empty. */
+//      return;
+//   }
+//
+//   /* lset_new is now empty */
+//   card_new = HG_(cardinalityWS)( univ_lsets, lset_new );
+//   tl_assert(card_new == 0);
+//
+//   lset_old = del_BHL( lset_old );
+//   card_old = HG_(cardinalityWS)( univ_lsets, lset_old );
+//
+//   if (0) VG_(printf)(" X2: %d (card %d) -> %d (card %d)\n",
+//                      (Int)lset_old, card_old, (Int)lset_new, card_new );
+//
+//   if (card_old == 0) {
+//      /* The old lockset was also empty.  Not interesting. */
+//      return;
+//   }
+//
+//   tl_assert(card_old > 0);
+//   tl_assert(!HG_(isEmptyWS)( univ_lsets, lset_old ));
+//
+//   /* Now we know we've got a transition from a nonempty lockset to an
+//      empty one.  So lset_old must be the set of locks lost.  Record
+//      some details.  If there is more than one element in the lossage
+//      set, just choose one arbitrarily -- not the best, but at least
+//      it's simple. */
+//
+//   lk = (Lock*)HG_(anyElementOfWS)( univ_lsets, lset_old );
+//   if (0) VG_(printf)("lossage %ld %p\n",
+//                      HG_(cardinalityWS)( univ_lsets, lset_old), lk );
+//   if (lk->appeared_at) {
+//      if (ga_to_lastlock == NULL)
+//         ga_to_lastlock = VG_(newFM)( HG_(zalloc), "hg.rlll.1", HG_(free), NULL );
+//      VG_(addToFM)( ga_to_lastlock, ga_of_access, (Word)lk->appeared_at );
+//      stats__ga_LL_adds++;
+//   }
+//}
+//
+///* This queries the table (ga_to_lastlock) made by
+//   record_last_lock_lossage, when constructing error messages.  It
+//   attempts to find the ExeContext of the allocation or initialisation
+//   point for the lossage lock associated with 'ga'. */
+//
+//static ExeContext* maybe_get_lastlock_initpoint ( Addr ga ) 
+//{
+//   ExeContext* ec_hint = NULL;
+//   if (ga_to_lastlock != NULL 
+//       && VG_(lookupFM)(ga_to_lastlock, 
+//                        NULL, (Word*)&ec_hint, ga)) {
+//      tl_assert(ec_hint != NULL);
+//      return ec_hint;
+//   } else {
+//      return NULL;
+//   }
+//}
+
+
+/*----------------------------------------------------------------*/
+/*--- Shadow value and address range handlers                  ---*/
+/*----------------------------------------------------------------*/
+
+static void laog__pre_thread_acquires_lock ( Thread*, Lock* ); /* fwds */
+//static void laog__handle_lock_deletions    ( WordSetID ); /* fwds */
+static inline Thread* get_current_Thread ( void ); /* fwds */
+__attribute__((noinline))
+static void laog__handle_one_lock_deletion ( Lock* lk ); /* fwds */
+
+
+/* Block-copy states (needed for implementing realloc()). */
+static void shadow_mem_copy_range ( Addr src, Addr dst, SizeT len )
+{
+   libhb_copy_shadow_state( src, dst, len );
+}
+
+static void shadow_mem_read_range ( Thread* thr, Addr a, SizeT len )
+{
+   Thr*     hbthr = thr->hbthr;
+   tl_assert(hbthr);
+   LIBHB_READ_N(hbthr, a, len);
+}
+
+static void shadow_mem_write_range ( Thread* thr, Addr a, SizeT len ) {
+   Thr*     hbthr = thr->hbthr;
+   tl_assert(hbthr);
+   LIBHB_WRITE_N(hbthr, a, len);
+}
+
+static void shadow_mem_make_New ( Thread* thr, Addr a, SizeT len )
+{
+   libhb_range_new( thr->hbthr, a, len );
+}
+
+static void shadow_mem_make_NoAccess ( Thread* thr, Addr aIN, SizeT len )
+{
+   if (0 && len > 500)
+      VG_(printf)("make NoAccess ( %#lx, %ld )\n", aIN, len );
+   libhb_range_noaccess( thr->hbthr, aIN, len );
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- Event handlers (evh__* functions)                        ---*/
+/*--- plus helpers (evhH__* functions)                         ---*/
+/*----------------------------------------------------------------*/
+
+/*--------- Event handler helpers (evhH__* functions) ---------*/
+
+/* Create a new segment for 'thr', making it depend (.prev) on its
+   existing segment, bind together the SegmentID and Segment, and
+   return both of them.  Also update 'thr' so it references the new
+   Segment. */
+//zz static 
+//zz void evhH__start_new_segment_for_thread ( /*OUT*/SegmentID* new_segidP,
+//zz                                           /*OUT*/Segment** new_segP,
+//zz                                           Thread* thr )
+//zz {
+//zz    Segment* cur_seg;
+//zz    tl_assert(new_segP);
+//zz    tl_assert(new_segidP);
+//zz    tl_assert(HG_(is_sane_Thread)(thr));
+//zz    cur_seg = map_segments_lookup( thr->csegid );
+//zz    tl_assert(cur_seg);
+//zz    tl_assert(cur_seg->thr == thr); /* all sane segs should point back
+//zz                                       at their owner thread. */
+//zz    *new_segP = mk_Segment( thr, cur_seg, NULL/*other*/ );
+//zz    *new_segidP = alloc_SegmentID();
+//zz    map_segments_add( *new_segidP, *new_segP );
+//zz    thr->csegid = *new_segidP;
+//zz }
+
+
+/* The lock at 'lock_ga' has acquired a writer.  Make all necessary
+   updates, and also do all possible error checks. */
+static 
+void evhH__post_thread_w_acquires_lock ( Thread* thr, 
+                                         LockKind lkk, Addr lock_ga )
+{
+   Lock* lk; 
+
+   /* Basically what we need to do is call lockN_acquire_writer.
+      However, that will barf if any 'invalid' lock states would
+      result.  Therefore check before calling.  Side effect is that
+      'HG_(is_sane_LockN)(lk)' is both a pre- and post-condition of this
+      routine. 
+
+      Because this routine is only called after successful lock
+      acquisition, we should not be asked to move the lock into any
+      invalid states.  Requests to do so are bugs in libpthread, since
+      that should have rejected any such requests. */
+
+   tl_assert(HG_(is_sane_Thread)(thr));
+   /* Try to find the lock.  If we can't, then create a new one with
+      kind 'lkk'. */
+   lk = map_locks_lookup_or_create( 
+           lkk, lock_ga, map_threads_reverse_lookup_SLOW(thr) );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+
+   /* check libhb level entities exist */
+   tl_assert(thr->hbthr);
+   tl_assert(lk->hbso);
+
+   if (lk->heldBy == NULL) {
+      /* the lock isn't held.  Simple. */
+      tl_assert(!lk->heldW);
+      lockN_acquire_writer( lk, thr );
+      /* acquire a dependency from the lock's VCs */
+      libhb_so_recv( thr->hbthr, lk->hbso, True/*strong_recv*/ );
+      goto noerror;
+   }
+
+   /* So the lock is already held.  If held as a r-lock then
+      libpthread must be buggy. */
+   tl_assert(lk->heldBy);
+   if (!lk->heldW) {
+      HG_(record_error_Misc)(
+         thr, "Bug in libpthread: write lock "
+              "granted on rwlock which is currently rd-held");
+      goto error;
+   }
+
+   /* So the lock is held in w-mode.  If it's held by some other
+      thread, then libpthread must be buggy. */
+   tl_assert(VG_(sizeUniqueBag)(lk->heldBy) == 1); /* from precondition */
+
+   if (thr != (Thread*)VG_(anyElementOfBag)(lk->heldBy)) {
+      HG_(record_error_Misc)(
+         thr, "Bug in libpthread: write lock "
+              "granted on mutex/rwlock which is currently "
+              "wr-held by a different thread");
+      goto error;
+   }
+
+   /* So the lock is already held in w-mode by 'thr'.  That means this
+      is an attempt to lock it recursively, which is only allowable
+      for LK_mbRec kinded locks.  Since this routine is called only
+      once the lock has been acquired, this must also be a libpthread
+      bug. */
+   if (lk->kind != LK_mbRec) {
+      HG_(record_error_Misc)(
+         thr, "Bug in libpthread: recursive write lock "
+              "granted on mutex/wrlock which does not "
+              "support recursion");
+      goto error;
+   }
+
+   /* So we are recursively re-locking a lock we already w-hold. */
+   lockN_acquire_writer( lk, thr );
+   /* acquire a dependency from the lock's VC.  Probably pointless,
+      but also harmless. */
+   libhb_so_recv( thr->hbthr, lk->hbso, True/*strong_recv*/ );
+   goto noerror;
+
+  noerror:
+   /* check lock order acquisition graph, and update.  This has to
+      happen before the lock is added to the thread's locksetA/W. */
+   laog__pre_thread_acquires_lock( thr, lk );
+   /* update the thread's held-locks set */
+   thr->locksetA = HG_(addToWS)( univ_lsets, thr->locksetA, (Word)lk );
+   thr->locksetW = HG_(addToWS)( univ_lsets, thr->locksetW, (Word)lk );
+   /* fall through */
+
+  error:
+   tl_assert(HG_(is_sane_LockN)(lk));
+}
+
+
+/* The lock at 'lock_ga' has acquired a reader.  Make all necessary
+   updates, and also do all possible error checks. */
+static 
+void evhH__post_thread_r_acquires_lock ( Thread* thr, 
+                                         LockKind lkk, Addr lock_ga )
+{
+   Lock* lk; 
+
+   /* Basically what we need to do is call lockN_acquire_reader.
+      However, that will barf if any 'invalid' lock states would
+      result.  Therefore check before calling.  Side effect is that
+      'HG_(is_sane_LockN)(lk)' is both a pre- and post-condition of this
+      routine. 
+
+      Because this routine is only called after successful lock
+      acquisition, we should not be asked to move the lock into any
+      invalid states.  Requests to do so are bugs in libpthread, since
+      that should have rejected any such requests. */
+
+   tl_assert(HG_(is_sane_Thread)(thr));
+   /* Try to find the lock.  If we can't, then create a new one with
+      kind 'lkk'.  Only a reader-writer lock can be read-locked,
+      hence the first assertion. */
+   tl_assert(lkk == LK_rdwr);
+   lk = map_locks_lookup_or_create( 
+           lkk, lock_ga, map_threads_reverse_lookup_SLOW(thr) );
+   tl_assert( HG_(is_sane_LockN)(lk) );
+
+   /* check libhb level entities exist */
+   tl_assert(thr->hbthr);
+   tl_assert(lk->hbso);
+
+   if (lk->heldBy == NULL) {
+      /* the lock isn't held.  Simple. */
+      tl_assert(!lk->heldW);
+      lockN_acquire_reader( lk, thr );
+      /* acquire a dependency from the lock's VC */
+      libhb_so_recv( thr->hbthr, lk->hbso, False/*!strong_recv*/ );
+      goto noerror;
+   }
+
+   /* So the lock is already held.  If held as a w-lock then
+      libpthread must be buggy. */
+   tl_assert(lk->heldBy);
+   if (lk->heldW) {
+      HG_(record_error_Misc)( thr, "Bug in libpthread: read lock "
+                                   "granted on rwlock which is "
+                                   "currently wr-held");
+      goto error;
+   }
+
+   /* Easy enough.  In short anybody can get a read-lock on a rwlock
+      provided it is either unlocked or already in rd-held. */
+   lockN_acquire_reader( lk, thr );
+   /* acquire a dependency from the lock's VC.  Probably pointless,
+      but also harmless. */
+   libhb_so_recv( thr->hbthr, lk->hbso, False/*!strong_recv*/ );
+   goto noerror;
+
+  noerror:
+   /* check lock order acquisition graph, and update.  This has to
+      happen before the lock is added to the thread's locksetA/W. */
+   laog__pre_thread_acquires_lock( thr, lk );
+   /* update the thread's held-locks set */
+   thr->locksetA = HG_(addToWS)( univ_lsets, thr->locksetA, (Word)lk );
+   /* but don't update thr->locksetW, since lk is only rd-held */
+   /* fall through */
+
+  error:
+   tl_assert(HG_(is_sane_LockN)(lk));
+}
+
+
+/* The lock at 'lock_ga' is just about to be unlocked.  Make all
+   necessary updates, and also do all possible error checks. */
+static 
+void evhH__pre_thread_releases_lock ( Thread* thr,
+                                      Addr lock_ga, Bool isRDWR )
+{
+   Lock* lock;
+   Word  n;
+   Bool  was_heldW;
+
+   /* This routine is called prior to a lock release, before
+      libpthread has had a chance to validate the call.  Hence we need
+      to detect and reject any attempts to move the lock into an
+      invalid state.  Such attempts are bugs in the client.
+
+      isRDWR is True if we know from the wrapper context that lock_ga
+      should refer to a reader-writer lock, and is False if [ditto]
+      lock_ga should refer to a standard mutex. */
+
+   tl_assert(HG_(is_sane_Thread)(thr));
+   lock = map_locks_maybe_lookup( lock_ga );
+
+   if (!lock) {
+      /* We know nothing about a lock at 'lock_ga'.  Nevertheless
+         the client is trying to unlock it.  So complain, then ignore
+         the attempt. */
+      HG_(record_error_UnlockBogus)( thr, lock_ga );
+      return;
+   }
+
+   tl_assert(lock->guestaddr == lock_ga);
+   tl_assert(HG_(is_sane_LockN)(lock));
+
+   if (isRDWR && lock->kind != LK_rdwr) {
+      HG_(record_error_Misc)( thr, "pthread_rwlock_unlock with a "
+                                   "pthread_mutex_t* argument " );
+   }
+   if ((!isRDWR) && lock->kind == LK_rdwr) {
+      HG_(record_error_Misc)( thr, "pthread_mutex_unlock with a "
+                                   "pthread_rwlock_t* argument " );
+   }
+
+   if (!lock->heldBy) {
+      /* The lock is not held.  This indicates a serious bug in the
+         client. */
+      tl_assert(!lock->heldW);
+      HG_(record_error_UnlockUnlocked)( thr, lock );
+      tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetA, (Word)lock ));
+      tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetW, (Word)lock ));
+      goto error;
+   }
+
+   /* test just above dominates */
+   tl_assert(lock->heldBy);
+   was_heldW = lock->heldW;
+
+   /* The lock is held.  Is this thread one of the holders?  If not,
+      report a bug in the client. */
+   n = VG_(elemBag)( lock->heldBy, (Word)thr );
+   tl_assert(n >= 0);
+   if (n == 0) {
+      /* We are not a current holder of the lock.  This is a bug in
+         the guest, and (per POSIX pthread rules) the unlock
+         attempt will fail.  So just complain and do nothing
+         else. */
+      Thread* realOwner = (Thread*)VG_(anyElementOfBag)( lock->heldBy );
+      tl_assert(HG_(is_sane_Thread)(realOwner));
+      tl_assert(realOwner != thr);
+      tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetA, (Word)lock ));
+      tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetW, (Word)lock ));
+      HG_(record_error_UnlockForeign)( thr, realOwner, lock );
+      goto error;
+   }
+
+   /* Ok, we hold the lock 'n' times. */
+   tl_assert(n >= 1);
+
+   lockN_release( lock, thr );
+
+   n--;
+   tl_assert(n >= 0);
+
+   if (n > 0) {
+      tl_assert(lock->heldBy);
+      tl_assert(n == VG_(elemBag)( lock->heldBy, (Word)thr )); 
+      /* We still hold the lock.  So either it's a recursive lock 
+         or a rwlock which is currently r-held. */
+      tl_assert(lock->kind == LK_mbRec
+                || (lock->kind == LK_rdwr && !lock->heldW));
+      tl_assert(HG_(elemWS)( univ_lsets, thr->locksetA, (Word)lock ));
+      if (lock->heldW)
+         tl_assert(HG_(elemWS)( univ_lsets, thr->locksetW, (Word)lock ));
+      else
+         tl_assert(!HG_(elemWS)( univ_lsets, thr->locksetW, (Word)lock ));
+   } else {
+      /* We no longer hold the lock. */
+      tl_assert(!lock->heldBy);
+      tl_assert(lock->heldW == False);
+      //if (lock->heldBy) {
+      //   tl_assert(0 == VG_(elemBag)( lock->heldBy, (Word)thr ));
+      //}
+      /* update this thread's lockset accordingly. */
+      thr->locksetA
+         = HG_(delFromWS)( univ_lsets, thr->locksetA, (Word)lock );
+      thr->locksetW
+         = HG_(delFromWS)( univ_lsets, thr->locksetW, (Word)lock );
+      /* push our VC into the lock */
+      tl_assert(thr->hbthr);
+      tl_assert(lock->hbso);
+      /* If the lock was previously W-held, then we want to do a
+         strong send, and if previously R-held, then a weak send. */
+      libhb_so_send( thr->hbthr, lock->hbso, was_heldW );
+   }
+   /* fall through */
+
+  error:
+   tl_assert(HG_(is_sane_LockN)(lock));
+}
+
+
+/* ---------------------------------------------------------- */
+/* -------- Event handlers proper (evh__* functions) -------- */
+/* ---------------------------------------------------------- */
+
+/* What is the Thread* for the currently running thread?  This is
+   absolutely performance critical.  We receive notifications from the
+   core for client code starts/stops, and cache the looked-up result
+   in 'current_Thread'.  Hence, for the vast majority of requests,
+   finding the current thread reduces to a read of a global variable,
+   provided get_current_Thread_in_C_C is inlined.
+
+   Outside of client code, current_Thread is NULL, and presumably
+   any uses of it will cause a segfault.  Hence:
+
+   - for uses definitely within client code, use
+     get_current_Thread_in_C_C.
+
+   - for all other uses, use get_current_Thread.
+*/
+
+static Thread* current_Thread = NULL;
+
+static void evh__start_client_code ( ThreadId tid, ULong nDisp ) {
+   if (0) VG_(printf)("start %d %llu\n", (Int)tid, nDisp);
+   tl_assert(current_Thread == NULL);
+   current_Thread = map_threads_lookup( tid );
+   tl_assert(current_Thread != NULL);
+}
+static void evh__stop_client_code ( ThreadId tid, ULong nDisp ) {
+   if (0) VG_(printf)(" stop %d %llu\n", (Int)tid, nDisp);
+   tl_assert(current_Thread != NULL);
+   current_Thread = NULL;
+   libhb_maybe_GC();
+}
+static inline Thread* get_current_Thread_in_C_C ( void ) {
+   return current_Thread;
+}
+static inline Thread* get_current_Thread ( void ) {
+   ThreadId coretid;
+   Thread*  thr;
+   thr = get_current_Thread_in_C_C();
+   if (LIKELY(thr))
+      return thr;
+   /* evidently not in client code.  Do it the slow way. */
+   coretid = VG_(get_running_tid)();
+   /* FIXME: get rid of the following kludge.  It exists because
+      evh__new_mem is called during initialisation (as notification
+      of initial memory layout) and VG_(get_running_tid)() returns
+      VG_INVALID_THREADID at that point. */
+   if (coretid == VG_INVALID_THREADID)
+      coretid = 1; /* KLUDGE */
+   thr = map_threads_lookup( coretid );
+   return thr;
+}
+
+static
+void evh__new_mem ( Addr a, SizeT len ) {
+   if (SHOW_EVENTS >= 2)
+      VG_(printf)("evh__new_mem(%p, %lu)\n", (void*)a, len );
+   shadow_mem_make_New( get_current_Thread(), a, len );
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__new_mem-post");
+}
+
+static
+void evh__new_mem_w_tid ( Addr a, SizeT len, ThreadId tid ) {
+   if (SHOW_EVENTS >= 2)
+      VG_(printf)("evh__new_mem_w_tid(%p, %lu)\n", (void*)a, len );
+   shadow_mem_make_New( get_current_Thread(), a, len );
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__new_mem_w_tid-post");
+}
+
+static
+void evh__new_mem_w_perms ( Addr a, SizeT len, 
+                            Bool rr, Bool ww, Bool xx, ULong di_handle ) {
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__new_mem_w_perms(%p, %lu, %d,%d,%d)\n",
+                  (void*)a, len, (Int)rr, (Int)ww, (Int)xx );
+   if (rr || ww || xx)
+      shadow_mem_make_New( get_current_Thread(), a, len );
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__new_mem_w_perms-post");
+}
+
+static
+void evh__set_perms ( Addr a, SizeT len,
+                      Bool rr, Bool ww, Bool xx ) {
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__set_perms(%p, %lu, %d,%d,%d)\n",
+                  (void*)a, len, (Int)rr, (Int)ww, (Int)xx );
+   /* Hmm.  What should we do here, that actually makes any sense?
+      Let's say: if neither readable nor writable, then declare it
+      NoAccess, else leave it alone. */
+   if (!(rr || ww))
+      shadow_mem_make_NoAccess( get_current_Thread(), a, len );
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__set_perms-post");
+}
+
+static
+void evh__die_mem ( Addr a, SizeT len ) {
+   if (SHOW_EVENTS >= 2)
+      VG_(printf)("evh__die_mem(%p, %lu)\n", (void*)a, len );
+   shadow_mem_make_NoAccess( get_current_Thread(), a, len );
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__die_mem-post");
+}
+
+static
+void evh__pre_thread_ll_create ( ThreadId parent, ThreadId child )
+{
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__pre_thread_ll_create(p=%d, c=%d)\n",
+                  (Int)parent, (Int)child );
+
+   if (parent != VG_INVALID_THREADID) {
+      Thread* thr_p;
+      Thread* thr_c;
+      Thr*    hbthr_p;
+      Thr*    hbthr_c;
+
+      tl_assert(HG_(is_sane_ThreadId)(parent));
+      tl_assert(HG_(is_sane_ThreadId)(child));
+      tl_assert(parent != child);
+
+      thr_p = map_threads_maybe_lookup( parent );
+      thr_c = map_threads_maybe_lookup( child );
+
+      tl_assert(thr_p != NULL);
+      tl_assert(thr_c == NULL);
+
+      hbthr_p = thr_p->hbthr;
+      tl_assert(hbthr_p != NULL);
+      tl_assert( libhb_get_Thr_opaque(hbthr_p) == thr_p );
+
+      hbthr_c = libhb_create ( hbthr_p );
+
+      /* Create a new thread record for the child. */
+      /* a Thread for the new thread ... */
+      thr_c = mk_Thread( hbthr_c );
+      tl_assert( libhb_get_Thr_opaque(hbthr_c) == NULL );
+      libhb_set_Thr_opaque(hbthr_c, thr_c);
+
+      /* and bind it in the thread-map table */
+      map_threads[child] = thr_c;
+      tl_assert(thr_c->coretid == VG_INVALID_THREADID);
+      thr_c->coretid = child;
+
+      /* Record where the parent is so we can later refer to this in
+         error messages.
+
+         On amd64-linux, this entails a nasty glibc-2.5 specific hack.
+         The stack snapshot is taken immediately after the parent has
+         returned from its sys_clone call.  Unfortunately there is no
+         unwind info for the insn following "syscall" - reading the
+         glibc sources confirms this.  So we ask for a snapshot to be
+         taken as if RIP was 3 bytes earlier, in a place where there
+         is unwind info.  Sigh.
+      */
+      { Word first_ip_delta = 0;
+#       if defined(VGP_amd64_linux)
+        first_ip_delta = -3;
+#       endif
+        thr_c->created_at = VG_(record_ExeContext)(parent, first_ip_delta);
+      }
+   }
+
+   if (HG_(clo_sanity_flags) & SCE_THREADS)
+      all__sanity_check("evh__pre_thread_create-post");
+}
+
+static
+void evh__pre_thread_ll_exit ( ThreadId quit_tid )
+{
+   Int     nHeld;
+   Thread* thr_q;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__pre_thread_ll_exit(thr=%d)\n",
+                  (Int)quit_tid );
+
+   /* quit_tid has disappeared without joining to any other thread.
+      Therefore there is no synchronisation event associated with its
+      exit and so we have to pretty much treat it as if it was still
+      alive but mysteriously making no progress.  That is because, if
+      we don't know when it really exited, then we can never say there
+      is a point in time when we're sure the thread really has
+      finished, and so we need to consider the possibility that it
+      lingers indefinitely and continues to interact with other
+      threads. */
+   /* However, it might have rendezvous'd with a thread that called
+      pthread_join with this one as arg, prior to this point (that's
+      how NPTL works).  In which case there has already been a prior
+      sync event.  So in any case, just let the thread exit.  On NPTL,
+      all thread exits go through here. */
+   tl_assert(HG_(is_sane_ThreadId)(quit_tid));
+   thr_q = map_threads_maybe_lookup( quit_tid );
+   tl_assert(thr_q != NULL);
+
+   /* Complain if this thread holds any locks. */
+   nHeld = HG_(cardinalityWS)( univ_lsets, thr_q->locksetA );
+   tl_assert(nHeld >= 0);
+   if (nHeld > 0) {
+      HChar buf[80];
+      VG_(sprintf)(buf, "Exiting thread still holds %d lock%s",
+                        nHeld, nHeld > 1 ? "s" : "");
+      HG_(record_error_Misc)( thr_q, buf );
+   }
+
+   /* About the only thing we do need to do is clear the map_threads
+      entry, in order that the Valgrind core can re-use it. */
+   tl_assert(thr_q->coretid == quit_tid);
+   thr_q->coretid = VG_INVALID_THREADID;
+   map_threads_delete( quit_tid );
+
+   if (HG_(clo_sanity_flags) & SCE_THREADS)
+      all__sanity_check("evh__pre_thread_ll_exit-post");
+}
+
+
+static
+void evh__HG_PTHREAD_JOIN_POST ( ThreadId stay_tid, Thread* quit_thr )
+{
+   Thread*  thr_s;
+   Thread*  thr_q;
+   Thr*     hbthr_s;
+   Thr*     hbthr_q;
+   SO*      so;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__post_thread_join(stayer=%d, quitter=%p)\n",
+                  (Int)stay_tid, quit_thr );
+
+   tl_assert(HG_(is_sane_ThreadId)(stay_tid));
+
+   thr_s = map_threads_maybe_lookup( stay_tid );
+   thr_q = quit_thr;
+   tl_assert(thr_s != NULL);
+   tl_assert(thr_q != NULL);
+   tl_assert(thr_s != thr_q);
+
+   hbthr_s = thr_s->hbthr;
+   hbthr_q = thr_q->hbthr;
+   tl_assert(hbthr_s != hbthr_q);
+   tl_assert( libhb_get_Thr_opaque(hbthr_s) == thr_s );
+   tl_assert( libhb_get_Thr_opaque(hbthr_q) == thr_q );
+
+   /* Allocate a temporary synchronisation object and use it to send
+      an imaginary message from the quitter to the stayer, the purpose
+      being to generate a dependence from the quitter to the
+      stayer. */
+   so = libhb_so_alloc();
+   tl_assert(so);
+   libhb_so_send(hbthr_q, so, True/*strong_send*/);
+   libhb_so_recv(hbthr_s, so, True/*strong_recv*/);
+   libhb_so_dealloc(so);
+
+   /* evh__pre_thread_ll_exit issues an error message if the exiting
+      thread holds any locks.  No need to check here. */
+
+   /* This holds because, at least when using NPTL as the thread
+      library, we should be notified the low level thread exit before
+      we hear of any join event on it.  The low level exit
+      notification feeds through into evh__pre_thread_ll_exit,
+      which should clear the map_threads entry for it.  Hence we
+      expect there to be no map_threads entry at this point. */
+   tl_assert( map_threads_maybe_reverse_lookup_SLOW(thr_q)
+              == VG_INVALID_THREADID);
+
+   if (HG_(clo_sanity_flags) & SCE_THREADS)
+      all__sanity_check("evh__post_thread_join-post");
+}
+
+static
+void evh__pre_mem_read ( CorePart part, ThreadId tid, Char* s, 
+                         Addr a, SizeT size) {
+   if (SHOW_EVENTS >= 2
+       || (SHOW_EVENTS >= 1 && size != 1))
+      VG_(printf)("evh__pre_mem_read(ctid=%d, \"%s\", %p, %lu)\n", 
+                  (Int)tid, s, (void*)a, size );
+   shadow_mem_read_range( map_threads_lookup(tid), a, size);
+   if (size >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__pre_mem_read-post");
+}
+
+static
+void evh__pre_mem_read_asciiz ( CorePart part, ThreadId tid,
+                                Char* s, Addr a ) {
+   Int len;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__pre_mem_asciiz(ctid=%d, \"%s\", %p)\n", 
+                  (Int)tid, s, (void*)a );
+   // FIXME: think of a less ugly hack
+   len = VG_(strlen)( (Char*) a );
+   shadow_mem_read_range( map_threads_lookup(tid), a, len+1 );
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__pre_mem_read_asciiz-post");
+}
+
+static
+void evh__pre_mem_write ( CorePart part, ThreadId tid, Char* s,
+                          Addr a, SizeT size ) {
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__pre_mem_write(ctid=%d, \"%s\", %p, %lu)\n", 
+                  (Int)tid, s, (void*)a, size );
+   shadow_mem_write_range( map_threads_lookup(tid), a, size);
+   if (size >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__pre_mem_write-post");
+}
+
+static
+void evh__new_mem_heap ( Addr a, SizeT len, Bool is_inited ) {
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__new_mem_heap(%p, %lu, inited=%d)\n", 
+                  (void*)a, len, (Int)is_inited );
+   // FIXME: this is kinda stupid
+   if (is_inited) {
+      shadow_mem_make_New(get_current_Thread(), a, len);
+   } else {
+      shadow_mem_make_New(get_current_Thread(), a, len);
+   }
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__pre_mem_read-post");
+}
+
+static
+void evh__die_mem_heap ( Addr a, SizeT len ) {
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__die_mem_heap(%p, %lu)\n", (void*)a, len );
+   shadow_mem_make_NoAccess( get_current_Thread(), a, len );
+   if (len >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE))
+      all__sanity_check("evh__pre_mem_read-post");
+}
+
+static VG_REGPARM(1)
+void evh__mem_help_read_1(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_1(hbthr, a);
+}
+
+static VG_REGPARM(1)
+void evh__mem_help_read_2(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_2(hbthr, a);
+}
+
+static VG_REGPARM(1)
+void evh__mem_help_read_4(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_4(hbthr, a);
+}
+
+static VG_REGPARM(1)
+void evh__mem_help_read_8(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_8(hbthr, a);
+}
+
+static VG_REGPARM(2)
+void evh__mem_help_read_N(Addr a, SizeT size) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_READ_N(hbthr, a, size);
+}
+
+static VG_REGPARM(1)
+void evh__mem_help_write_1(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_1(hbthr, a);
+}
+
+static VG_REGPARM(1)
+void evh__mem_help_write_2(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_2(hbthr, a);
+}
+
+static VG_REGPARM(1)
+void evh__mem_help_write_4(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_4(hbthr, a);
+}
+
+static VG_REGPARM(1)
+void evh__mem_help_write_8(Addr a) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_8(hbthr, a);
+}
+
+static VG_REGPARM(2)
+void evh__mem_help_write_N(Addr a, SizeT size) {
+   Thread*  thr = get_current_Thread_in_C_C();
+   Thr*     hbthr = thr->hbthr;
+   LIBHB_WRITE_N(hbthr, a, size);
+}
+
+//static void evh__bus_lock(void) {
+//   Thread* thr;
+//   if (0) VG_(printf)("evh__bus_lock()\n");
+//   thr = get_current_Thread();
+//   tl_assert(thr); /* cannot fail - Thread* must already exist */
+//   evhH__post_thread_w_acquires_lock( thr, LK_nonRec, (Addr)&__bus_lock );
+//}
+//static void evh__bus_unlock(void) {
+//   Thread* thr;
+//   if (0) VG_(printf)("evh__bus_unlock()\n");
+//   thr = get_current_Thread();
+//   tl_assert(thr); /* cannot fail - Thread* must already exist */
+//   evhH__pre_thread_releases_lock( thr, (Addr)&__bus_lock, False/*!isRDWR*/ );
+//}
+
+/* ------------------------------------------------------- */
+/* -------------- events to do with mutexes -------------- */
+/* ------------------------------------------------------- */
+
+/* EXPOSITION only: by intercepting lock init events we can show the
+   user where the lock was initialised, rather than only being able to
+   show where it was first locked.  Intercepting lock initialisations
+   is not necessary for the basic operation of the race checker. */
+static
+void evh__HG_PTHREAD_MUTEX_INIT_POST( ThreadId tid, 
+                                      void* mutex, Word mbRec )
+{
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_MUTEX_INIT_POST(ctid=%d, mbRec=%ld, %p)\n", 
+                  (Int)tid, mbRec, (void*)mutex );
+   tl_assert(mbRec == 0 || mbRec == 1);
+   map_locks_lookup_or_create( mbRec ? LK_mbRec : LK_nonRec,
+                               (Addr)mutex, tid );
+   if (HG_(clo_sanity_flags) & SCE_LOCKS)
+      all__sanity_check("evh__hg_PTHREAD_MUTEX_INIT_POST");
+}
+
+static
+void evh__HG_PTHREAD_MUTEX_DESTROY_PRE( ThreadId tid, void* mutex )
+{
+   Thread* thr;
+   Lock*   lk;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_MUTEX_DESTROY_PRE(ctid=%d, %p)\n", 
+                  (Int)tid, (void*)mutex );
+
+   thr = map_threads_maybe_lookup( tid );
+   /* cannot fail - Thread* must already exist */
+   tl_assert( HG_(is_sane_Thread)(thr) );
+
+   lk = map_locks_maybe_lookup( (Addr)mutex );
+
+   if (lk == NULL || (lk->kind != LK_nonRec && lk->kind != LK_mbRec)) {
+      HG_(record_error_Misc)(
+         thr, "pthread_mutex_destroy with invalid argument" );
+   }
+
+   if (lk) {
+      tl_assert( HG_(is_sane_LockN)(lk) );
+      tl_assert( lk->guestaddr == (Addr)mutex );
+      if (lk->heldBy) {
+         /* Basically act like we unlocked the lock */
+         HG_(record_error_Misc)(
+            thr, "pthread_mutex_destroy of a locked mutex" );
+         /* remove lock from locksets of all owning threads */
+         remove_Lock_from_locksets_of_all_owning_Threads( lk );
+         VG_(deleteBag)( lk->heldBy );
+         lk->heldBy = NULL;
+         lk->heldW = False;
+         lk->acquired_at = NULL;
+      }
+      tl_assert( !lk->heldBy );
+      tl_assert( HG_(is_sane_LockN)(lk) );
+
+      laog__handle_one_lock_deletion(lk);
+      map_locks_delete( lk->guestaddr );
+      del_LockN( lk );
+   }
+
+   if (HG_(clo_sanity_flags) & SCE_LOCKS)
+      all__sanity_check("evh__hg_PTHREAD_MUTEX_DESTROY_PRE");
+}
+
+static void evh__HG_PTHREAD_MUTEX_LOCK_PRE ( ThreadId tid,
+                                             void* mutex, Word isTryLock )
+{
+   /* Just check the mutex is sane; nothing else to do. */
+   // 'mutex' may be invalid - not checked by wrapper
+   Thread* thr;
+   Lock*   lk;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_MUTEX_LOCK_PRE(ctid=%d, mutex=%p)\n", 
+                  (Int)tid, (void*)mutex );
+
+   tl_assert(isTryLock == 0 || isTryLock == 1);
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   lk = map_locks_maybe_lookup( (Addr)mutex );
+
+   if (lk && (lk->kind == LK_rdwr)) {
+      HG_(record_error_Misc)( thr, "pthread_mutex_lock with a "
+                                   "pthread_rwlock_t* argument " );
+   }
+
+   if ( lk 
+        && isTryLock == 0
+        && (lk->kind == LK_nonRec || lk->kind == LK_rdwr)
+        && lk->heldBy
+        && lk->heldW
+        && VG_(elemBag)( lk->heldBy, (Word)thr ) > 0 ) {
+      /* uh, it's a non-recursive lock and we already w-hold it, and
+         this is a real lock operation (not a speculative "tryLock"
+         kind of thing).  Duh.  Deadlock coming up; but at least
+         produce an error message. */
+      HG_(record_error_Misc)( thr, "Attempt to re-lock a "
+                                   "non-recursive lock I already hold" );
+   }
+}
+
+static void evh__HG_PTHREAD_MUTEX_LOCK_POST ( ThreadId tid, void* mutex )
+{
+   // only called if the real library call succeeded - so mutex is sane
+   Thread* thr;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_MUTEX_LOCK_POST(ctid=%d, mutex=%p)\n", 
+                  (Int)tid, (void*)mutex );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   evhH__post_thread_w_acquires_lock( 
+      thr, 
+      LK_mbRec, /* if not known, create new lock with this LockKind */
+      (Addr)mutex
+   );
+}
+
+static void evh__HG_PTHREAD_MUTEX_UNLOCK_PRE ( ThreadId tid, void* mutex )
+{
+   // 'mutex' may be invalid - not checked by wrapper
+   Thread* thr;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_MUTEX_UNLOCK_PRE(ctid=%d, mutex=%p)\n", 
+                  (Int)tid, (void*)mutex );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   evhH__pre_thread_releases_lock( thr, (Addr)mutex, False/*!isRDWR*/ );
+}
+
+static void evh__HG_PTHREAD_MUTEX_UNLOCK_POST ( ThreadId tid, void* mutex )
+{
+   // only called if the real library call succeeded - so mutex is sane
+   Thread* thr;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_MUTEX_UNLOCK_POST(ctid=%d, mutex=%p)\n", 
+                  (Int)tid, (void*)mutex );
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   // anything we should do here?
+}
+
+
+/* ----------------------------------------------------- */
+/* --------------- events to do with CVs --------------- */
+/* ----------------------------------------------------- */
+
+/* A mapping from CV to the SO associated with it.  When the CV is
+   signalled/broadcasted upon, we do a 'send' into the SO, and when a
+   wait on it completes, we do a 'recv' from the SO.  This is believed
+   to give the correct happens-before events arising from CV
+   signallings/broadcasts.
+*/
+
+/* pthread_mutex_cond* -> SO* */
+static WordFM* map_cond_to_SO = NULL;
+
+static void map_cond_to_SO_INIT ( void ) {
+   if (UNLIKELY(map_cond_to_SO == NULL)) {
+      map_cond_to_SO = VG_(newFM)( HG_(zalloc),
+                                   "hg.mctSI.1", HG_(free), NULL );
+      tl_assert(map_cond_to_SO != NULL);
+   }
+}
+
+static SO* map_cond_to_SO_lookup_or_alloc ( void* cond ) {
+   UWord key, val;
+   map_cond_to_SO_INIT();
+   if (VG_(lookupFM)( map_cond_to_SO, &key, &val, (UWord)cond )) {
+      tl_assert(key == (UWord)cond);
+      return (SO*)val;
+   } else {
+      SO* so = libhb_so_alloc();
+      VG_(addToFM)( map_cond_to_SO, (UWord)cond, (UWord)so );
+      return so;
+   }
+}
+
+static void map_cond_to_SO_delete ( void* cond ) {
+   UWord keyW, valW;
+   map_cond_to_SO_INIT();
+   if (VG_(delFromFM)( map_cond_to_SO, &keyW, &valW, (UWord)cond )) {
+      SO* so = (SO*)valW;
+      tl_assert(keyW == (UWord)cond);
+      libhb_so_dealloc(so);
+   }
+}
+
+static void evh__HG_PTHREAD_COND_SIGNAL_PRE ( ThreadId tid, void* cond )
+{
+   /* 'tid' has signalled on 'cond'.  As per the comment above, bind
+      cond to a SO if it is not already so bound, and 'send' on the
+      SO.  This is later used by other thread(s) which successfully
+      exit from a pthread_cond_wait on the same cv; then they 'recv'
+      from the SO, thereby acquiring a dependency on this signalling
+      event. */
+   Thread*   thr;
+   SO*       so;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_COND_SIGNAL_PRE(ctid=%d, cond=%p)\n", 
+                  (Int)tid, (void*)cond );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   // error-if: mutex is bogus
+   // error-if: mutex is not locked
+
+   so = map_cond_to_SO_lookup_or_alloc( cond );
+   tl_assert(so);
+
+   libhb_so_send( thr->hbthr, so, True/*strong_send*/ );
+}
+
+/* returns True if it reckons 'mutex' is valid and held by this
+   thread, else False */
+static Bool evh__HG_PTHREAD_COND_WAIT_PRE ( ThreadId tid,
+                                            void* cond, void* mutex )
+{
+   Thread* thr;
+   Lock*   lk;
+   Bool    lk_valid = True;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_COND_WAIT_PRE"
+                  "(ctid=%d, cond=%p, mutex=%p)\n", 
+                  (Int)tid, (void*)cond, (void*)mutex );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   lk = map_locks_maybe_lookup( (Addr)mutex );
+
+   /* Check for stupid mutex arguments.  There are various ways to be
+      a bozo.  Only complain once, though, even if more than one thing
+      is wrong. */
+   if (lk == NULL) {
+      lk_valid = False;
+      HG_(record_error_Misc)( 
+         thr, 
+         "pthread_cond_{timed}wait called with invalid mutex" );
+   } else {
+      tl_assert( HG_(is_sane_LockN)(lk) );
+      if (lk->kind == LK_rdwr) {
+         lk_valid = False;
+         HG_(record_error_Misc)(
+            thr, "pthread_cond_{timed}wait called with mutex "
+                 "of type pthread_rwlock_t*" );
+      } else
+         if (lk->heldBy == NULL) {
+         lk_valid = False;
+         HG_(record_error_Misc)( 
+            thr, "pthread_cond_{timed}wait called with un-held mutex");
+      } else
+      if (lk->heldBy != NULL
+          && VG_(elemBag)( lk->heldBy, (Word)thr ) == 0) {
+         lk_valid = False;
+         HG_(record_error_Misc)(
+            thr, "pthread_cond_{timed}wait called with mutex "
+                 "held by a different thread" );
+      }
+   }
+
+   // error-if: cond is also associated with a different mutex
+
+   return lk_valid;
+}
+
+static void evh__HG_PTHREAD_COND_WAIT_POST ( ThreadId tid,
+                                             void* cond, void* mutex )
+{
+   /* A pthread_cond_wait(cond, mutex) completed successfully.  Find
+      the SO for this cond, and 'recv' from it so as to acquire a
+      dependency edge back to the signaller/broadcaster. */
+   Thread* thr;
+   SO*     so;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_COND_WAIT_POST"
+                  "(ctid=%d, cond=%p, mutex=%p)\n", 
+                  (Int)tid, (void*)cond, (void*)mutex );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   // error-if: cond is also associated with a different mutex
+
+   so = map_cond_to_SO_lookup_or_alloc( cond );
+   tl_assert(so);
+
+   if (!libhb_so_everSent(so)) {
+      /* Hmm.  How can a wait on 'cond' succeed if nobody signalled
+         it?  If this happened it would surely be a bug in the threads
+         library.  Or one of those fabled "spurious wakeups". */
+      HG_(record_error_Misc)( thr, "Bug in libpthread: pthread_cond_wait "
+                                   "succeeded on"
+                                   " without prior pthread_cond_post");
+   }
+
+   /* anyway, acquire a dependency on it. */
+   libhb_so_recv( thr->hbthr, so, True/*strong_recv*/ );
+}
+
+static void evh__HG_PTHREAD_COND_DESTROY_PRE ( ThreadId tid,
+                                               void* cond )
+{
+   /* Deal with destroy events.  The only purpose is to free storage
+      associated with the CV, so as to avoid any possible resource
+      leaks. */
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_COND_DESTROY_PRE"
+                  "(ctid=%d, cond=%p)\n", 
+                  (Int)tid, (void*)cond );
+
+   map_cond_to_SO_delete( cond );
+}
+
+
+/* ------------------------------------------------------- */
+/* -------------- events to do with rwlocks -------------- */
+/* ------------------------------------------------------- */
+
+/* EXPOSITION only */
+static
+void evh__HG_PTHREAD_RWLOCK_INIT_POST( ThreadId tid, void* rwl )
+{
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_RWLOCK_INIT_POST(ctid=%d, %p)\n", 
+                  (Int)tid, (void*)rwl );
+   map_locks_lookup_or_create( LK_rdwr, (Addr)rwl, tid );
+   if (HG_(clo_sanity_flags) & SCE_LOCKS)
+      all__sanity_check("evh__hg_PTHREAD_RWLOCK_INIT_POST");
+}
+
+static
+void evh__HG_PTHREAD_RWLOCK_DESTROY_PRE( ThreadId tid, void* rwl )
+{
+   Thread* thr;
+   Lock*   lk;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_RWLOCK_DESTROY_PRE(ctid=%d, %p)\n", 
+                  (Int)tid, (void*)rwl );
+
+   thr = map_threads_maybe_lookup( tid );
+   /* cannot fail - Thread* must already exist */
+   tl_assert( HG_(is_sane_Thread)(thr) );
+
+   lk = map_locks_maybe_lookup( (Addr)rwl );
+
+   if (lk == NULL || lk->kind != LK_rdwr) {
+      HG_(record_error_Misc)(
+         thr, "pthread_rwlock_destroy with invalid argument" );
+   }
+
+   if (lk) {
+      tl_assert( HG_(is_sane_LockN)(lk) );
+      tl_assert( lk->guestaddr == (Addr)rwl );
+      if (lk->heldBy) {
+         /* Basically act like we unlocked the lock */
+         HG_(record_error_Misc)(
+            thr, "pthread_rwlock_destroy of a locked mutex" );
+         /* remove lock from locksets of all owning threads */
+         remove_Lock_from_locksets_of_all_owning_Threads( lk );
+         VG_(deleteBag)( lk->heldBy );
+         lk->heldBy = NULL;
+         lk->heldW = False;
+         lk->acquired_at = NULL;
+      }
+      tl_assert( !lk->heldBy );
+      tl_assert( HG_(is_sane_LockN)(lk) );
+
+      laog__handle_one_lock_deletion(lk);
+      map_locks_delete( lk->guestaddr );
+      del_LockN( lk );
+   }
+
+   if (HG_(clo_sanity_flags) & SCE_LOCKS)
+      all__sanity_check("evh__hg_PTHREAD_RWLOCK_DESTROY_PRE");
+}
+
+static 
+void evh__HG_PTHREAD_RWLOCK_LOCK_PRE ( ThreadId tid,
+                                       void* rwl,
+                                       Word isW, Word isTryLock )
+{
+   /* Just check the rwl is sane; nothing else to do. */
+   // 'rwl' may be invalid - not checked by wrapper
+   Thread* thr;
+   Lock*   lk;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_RWLOCK_LOCK_PRE(ctid=%d, isW=%d, %p)\n", 
+                  (Int)tid, (Int)isW, (void*)rwl );
+
+   tl_assert(isW == 0 || isW == 1); /* assured us by wrapper */
+   tl_assert(isTryLock == 0 || isTryLock == 1); /* assured us by wrapper */
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   lk = map_locks_maybe_lookup( (Addr)rwl );
+   if ( lk 
+        && (lk->kind == LK_nonRec || lk->kind == LK_mbRec) ) {
+      /* Wrong kind of lock.  Duh.  */
+      HG_(record_error_Misc)( 
+         thr, "pthread_rwlock_{rd,rw}lock with a "
+              "pthread_mutex_t* argument " );
+   }
+}
+
+static 
+void evh__HG_PTHREAD_RWLOCK_LOCK_POST ( ThreadId tid, void* rwl, Word isW )
+{
+   // only called if the real library call succeeded - so mutex is sane
+   Thread* thr;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_RWLOCK_LOCK_POST(ctid=%d, isW=%d, %p)\n", 
+                  (Int)tid, (Int)isW, (void*)rwl );
+
+   tl_assert(isW == 0 || isW == 1); /* assured us by wrapper */
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   (isW ? evhH__post_thread_w_acquires_lock 
+        : evhH__post_thread_r_acquires_lock)( 
+      thr, 
+      LK_rdwr, /* if not known, create new lock with this LockKind */
+      (Addr)rwl
+   );
+}
+
+static void evh__HG_PTHREAD_RWLOCK_UNLOCK_PRE ( ThreadId tid, void* rwl )
+{
+   // 'rwl' may be invalid - not checked by wrapper
+   Thread* thr;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_RWLOCK_UNLOCK_PRE(ctid=%d, rwl=%p)\n", 
+                  (Int)tid, (void*)rwl );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   evhH__pre_thread_releases_lock( thr, (Addr)rwl, True/*isRDWR*/ );
+}
+
+static void evh__HG_PTHREAD_RWLOCK_UNLOCK_POST ( ThreadId tid, void* rwl )
+{
+   // only called if the real library call succeeded - so mutex is sane
+   Thread* thr;
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__hg_PTHREAD_RWLOCK_UNLOCK_POST(ctid=%d, rwl=%p)\n", 
+                  (Int)tid, (void*)rwl );
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   // anything we should do here?
+}
+
+
+/* ---------------------------------------------------------- */
+/* -------------- events to do with semaphores -------------- */
+/* ---------------------------------------------------------- */
+
+/* This is similar to but not identical to the handling for condition
+   variables. */
+
+/* For each semaphore, we maintain a stack of SOs.  When a 'post'
+   operation is done on a semaphore (unlocking, essentially), a new SO
+   is created for the posting thread, the posting thread does a strong
+   send to it (which merely installs the posting thread's VC in the
+   SO), and the SO is pushed on the semaphore's stack.
+
+   Later, when a (probably different) thread completes 'wait' on the
+   semaphore, we pop a SO off the semaphore's stack (which should be
+   nonempty), and do a strong recv from it.  This mechanism creates
+   dependencies between posters and waiters of the semaphore.
+
+   It may not be necessary to use a stack - perhaps a bag of SOs would
+   do.  But we do need to keep track of how many unused-up posts have
+   happened for the semaphore.
+
+   Imagine T1 and T2 both post once on a semaphore S, and T3 waits
+   twice on S.  T3 cannot complete its waits without both T1 and T2
+   posting.  The above mechanism will ensure that T3 acquires
+   dependencies on both T1 and T2.
+
+   When a semaphore is initialised with value N, we do as if we'd
+   posted N times on the semaphore: basically create N SOs and do a
+   strong send to all of then.  This allows up to N waits on the
+   semaphore to acquire a dependency on the initialisation point,
+   which AFAICS is the correct behaviour.
+
+   We don't emit an error for DESTROY_PRE on a semaphore we don't know
+   about.  We should.
+*/
+
+/* sem_t* -> XArray* SO* */
+static WordFM* map_sem_to_SO_stack = NULL;
+
+static void map_sem_to_SO_stack_INIT ( void ) {
+   if (map_sem_to_SO_stack == NULL) {
+      map_sem_to_SO_stack = VG_(newFM)( HG_(zalloc), "hg.mstSs.1",
+                                        HG_(free), NULL );
+      tl_assert(map_sem_to_SO_stack != NULL);
+   }
+}
+
+static void push_SO_for_sem ( void* sem, SO* so ) {
+   UWord   keyW;
+   XArray* xa;
+   tl_assert(so);
+   map_sem_to_SO_stack_INIT();
+   if (VG_(lookupFM)( map_sem_to_SO_stack, 
+                      &keyW, (UWord*)&xa, (UWord)sem )) {
+      tl_assert(keyW == (UWord)sem);
+      tl_assert(xa);
+      VG_(addToXA)( xa, &so );
+   } else {
+     xa = VG_(newXA)( HG_(zalloc), "hg.pSfs.1", HG_(free), sizeof(SO*) );
+      VG_(addToXA)( xa, &so );
+      VG_(addToFM)( map_sem_to_SO_stack, (Word)sem, (Word)xa );
+   }
+}
+
+static SO* mb_pop_SO_for_sem ( void* sem ) {
+   UWord    keyW;
+   XArray*  xa;
+   SO* so;
+   map_sem_to_SO_stack_INIT();
+   if (VG_(lookupFM)( map_sem_to_SO_stack, 
+                      &keyW, (UWord*)&xa, (UWord)sem )) {
+      /* xa is the stack for this semaphore. */
+      Word sz; 
+      tl_assert(keyW == (UWord)sem);
+      sz = VG_(sizeXA)( xa );
+      tl_assert(sz >= 0);
+      if (sz == 0)
+         return NULL; /* odd, the stack is empty */
+      so = *(SO**)VG_(indexXA)( xa, sz-1 );
+      tl_assert(so);
+      VG_(dropTailXA)( xa, 1 );
+      return so;
+   } else {
+      /* hmm, that's odd.  No stack for this semaphore. */
+      return NULL;
+   }
+}
+
+static void evh__HG_POSIX_SEM_DESTROY_PRE ( ThreadId tid, void* sem )
+{
+   UWord keyW, valW;
+   SO*   so;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_POSIX_SEM_DESTROY_PRE(ctid=%d, sem=%p)\n", 
+                  (Int)tid, (void*)sem );
+
+   map_sem_to_SO_stack_INIT();
+
+   /* Empty out the semaphore's SO stack.  This way of doing it is
+      stupid, but at least it's easy. */
+   while (1) {
+      so = mb_pop_SO_for_sem( sem );
+      if (!so) break;
+      libhb_so_dealloc(so);
+   }
+
+   if (VG_(delFromFM)( map_sem_to_SO_stack, &keyW, &valW, (UWord)sem )) {
+      XArray* xa = (XArray*)valW;
+      tl_assert(keyW == (UWord)sem);
+      tl_assert(xa);
+      tl_assert(VG_(sizeXA)(xa) == 0); /* preceding loop just emptied it */
+      VG_(deleteXA)(xa);
+   }
+}
+
+static 
+void evh__HG_POSIX_SEM_INIT_POST ( ThreadId tid, void* sem, UWord value )
+{
+   SO*     so;
+   Thread* thr;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_POSIX_SEM_INIT_POST(ctid=%d, sem=%p, value=%lu)\n", 
+                  (Int)tid, (void*)sem, value );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   /* Empty out the semaphore's SO stack.  This way of doing it is
+      stupid, but at least it's easy. */
+   while (1) {
+      so = mb_pop_SO_for_sem( sem );
+      if (!so) break;
+      libhb_so_dealloc(so);
+   }
+
+   /* If we don't do this check, the following while loop runs us out
+      of memory for stupid initial values of 'value'. */
+   if (value > 10000) {
+      HG_(record_error_Misc)(
+         thr, "sem_init: initial value exceeds 10000; using 10000" );
+      value = 10000;
+   }
+
+   /* Now create 'valid' new SOs for the thread, do a strong send to
+      each of them, and push them all on the stack. */
+   for (; value > 0; value--) {
+      Thr* hbthr = thr->hbthr;
+      tl_assert(hbthr);
+
+      so = libhb_so_alloc();
+      libhb_so_send( hbthr, so, True/*strong send*/ );
+      push_SO_for_sem( sem, so );
+   }
+}
+
+static void evh__HG_POSIX_SEM_POST_PRE ( ThreadId tid, void* sem )
+{
+   /* 'tid' has posted on 'sem'.  Create a new SO, do a strong send to
+      it (iow, write our VC into it, then tick ours), and push the SO
+      on on a stack of SOs associated with 'sem'.  This is later used
+      by other thread(s) which successfully exit from a sem_wait on
+      the same sem; by doing a strong recv from SOs popped of the
+      stack, they acquire dependencies on the posting thread
+      segment(s). */
+
+   Thread* thr;
+   SO*     so;
+   Thr*    hbthr;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_POSIX_SEM_POST_PRE(ctid=%d, sem=%p)\n", 
+                  (Int)tid, (void*)sem );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   // error-if: sem is bogus
+
+   hbthr = thr->hbthr;
+   tl_assert(hbthr);
+
+   so = libhb_so_alloc();
+   libhb_so_send( hbthr, so, True/*strong send*/ );
+   push_SO_for_sem( sem, so );
+}
+
+static void evh__HG_POSIX_SEM_WAIT_POST ( ThreadId tid, void* sem )
+{
+   /* A sem_wait(sem) completed successfully.  Pop the posting-SO for
+      the 'sem' from this semaphore's SO-stack, and do a strong recv
+      from it.  This creates a dependency back to one of the post-ers
+      for the semaphore. */
+
+   Thread* thr;
+   SO*     so;
+   Thr*    hbthr;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_POSIX_SEM_WAIT_POST(ctid=%d, sem=%p)\n", 
+                  (Int)tid, (void*)sem );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   // error-if: sem is bogus
+
+   so = mb_pop_SO_for_sem( sem );
+
+   if (so) {
+      hbthr = thr->hbthr;
+      tl_assert(hbthr);
+
+      libhb_so_recv( hbthr, so, True/*strong recv*/ );
+      libhb_so_dealloc(so);
+   } else {
+      /* Hmm.  How can a wait on 'sem' succeed if nobody posted to it?
+         If this happened it would surely be a bug in the threads
+         library. */
+      HG_(record_error_Misc)(
+         thr, "Bug in libpthread: sem_wait succeeded on"
+              " semaphore without prior sem_post");
+   }
+}
+
+
+/* -------------------------------------------------------- */
+/* -------------- events to do with barriers -------------- */
+/* -------------------------------------------------------- */
+
+typedef
+   struct {
+      Bool    initted; /* has it yet been initted by guest? */
+      UWord   size;    /* declared size */
+      XArray* waiting; /* XA of Thread*.  # present is 0 .. .size */
+   }
+   Bar;
+
+static Bar* new_Bar ( void ) {
+   Bar* bar = HG_(zalloc)( "hg.nB.1 (new_Bar)", sizeof(Bar) );
+   tl_assert(bar);
+   /* all fields are zero */
+   tl_assert(bar->initted == False);
+   return bar;
+}
+
+static void delete_Bar ( Bar* bar ) {
+   tl_assert(bar);
+   if (bar->waiting)
+      VG_(deleteXA)(bar->waiting);
+   HG_(free)(bar);
+}
+
+/* A mapping which stores auxiliary data for barriers. */
+
+/* pthread_barrier_t* -> Bar* */
+static WordFM* map_barrier_to_Bar = NULL;
+
+static void map_barrier_to_Bar_INIT ( void ) {
+   if (UNLIKELY(map_barrier_to_Bar == NULL)) {
+      map_barrier_to_Bar = VG_(newFM)( HG_(zalloc),
+                                       "hg.mbtBI.1", HG_(free), NULL );
+      tl_assert(map_barrier_to_Bar != NULL);
+   }
+}
+
+static Bar* map_barrier_to_Bar_lookup_or_alloc ( void* barrier ) {
+   UWord key, val;
+   map_barrier_to_Bar_INIT();
+   if (VG_(lookupFM)( map_barrier_to_Bar, &key, &val, (UWord)barrier )) {
+      tl_assert(key == (UWord)barrier);
+      return (Bar*)val;
+   } else {
+      Bar* bar = new_Bar();
+      VG_(addToFM)( map_barrier_to_Bar, (UWord)barrier, (UWord)bar );
+      return bar;
+   }
+}
+
+static void map_barrier_to_Bar_delete ( void* barrier ) {
+   UWord keyW, valW;
+   map_barrier_to_Bar_INIT();
+   if (VG_(delFromFM)( map_barrier_to_Bar, &keyW, &valW, (UWord)barrier )) {
+      Bar* bar = (Bar*)valW;
+      tl_assert(keyW == (UWord)barrier);
+      delete_Bar(bar);
+   }
+}
+
+
+static void evh__HG_PTHREAD_BARRIER_INIT_PRE ( ThreadId tid,
+                                               void* barrier,
+                                               UWord count )
+{
+   Thread* thr;
+   Bar*    bar;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_BARRIER_INIT_PRE"
+                  "(tid=%d, barrier=%p, count=%lu)\n", 
+                  (Int)tid, (void*)barrier, count );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   if (count == 0) {
+      HG_(record_error_Misc)(
+         thr, "pthread_barrier_init: 'count' argument is zero"
+      );
+   }
+
+   bar = map_barrier_to_Bar_lookup_or_alloc(barrier);
+   tl_assert(bar);
+
+   if (bar->initted) {
+      HG_(record_error_Misc)(
+         thr, "pthread_barrier_init: barrier is already initialised"
+      );
+   }
+
+   if (bar->waiting && VG_(sizeXA)(bar->waiting) > 0) {
+      tl_assert(bar->initted);
+      HG_(record_error_Misc)(
+         thr, "pthread_barrier_init: threads are waiting at barrier"
+      );
+      VG_(dropTailXA)(bar->waiting, VG_(sizeXA)(bar->waiting));
+   }
+   if (!bar->waiting) {
+      bar->waiting = VG_(newXA)( HG_(zalloc), "hg.eHPBIP.1", HG_(free),
+                                 sizeof(Thread*) );
+   }
+
+   tl_assert(bar->waiting);
+   tl_assert(VG_(sizeXA)(bar->waiting) == 0);
+   bar->initted = True;
+   bar->size    = count;
+}
+
+
+static void evh__HG_PTHREAD_BARRIER_DESTROY_PRE ( ThreadId tid,
+                                                  void* barrier )
+{
+   Thread* thr;
+   Bar*    bar;
+
+   /* Deal with destroy events.  The only purpose is to free storage
+      associated with the barrier, so as to avoid any possible
+      resource leaks. */
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_BARRIER_DESTROY_PRE"
+                  "(tid=%d, barrier=%p)\n", 
+                  (Int)tid, (void*)barrier );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   bar = map_barrier_to_Bar_lookup_or_alloc(barrier);
+   tl_assert(bar);
+
+   if (!bar->initted) {
+      HG_(record_error_Misc)(
+         thr, "pthread_barrier_destroy: barrier was never initialised"
+      );
+   }
+
+   if (bar->initted && bar->waiting && VG_(sizeXA)(bar->waiting) > 0) {
+      HG_(record_error_Misc)(
+         thr, "pthread_barrier_destroy: threads are waiting at barrier"
+      );
+   }
+
+   /* Maybe we shouldn't do this; just let it persist, so that when it
+      is reinitialised we don't need to do any dynamic memory
+      allocation?  The downside is a potentially unlimited space leak,
+      if the client creates (in turn) a large number of barriers all
+      at different locations.  Note that if we do later move to the
+      don't-delete-it scheme, we need to mark the barrier as
+      uninitialised again since otherwise a later _init call will
+      elicit a duplicate-init error.  */
+   map_barrier_to_Bar_delete( barrier );
+}
+
+
+static void evh__HG_PTHREAD_BARRIER_WAIT_PRE ( ThreadId tid,
+                                               void* barrier )
+{
+  /* This function gets called after a client thread calls
+     pthread_barrier_wait but before it arrives at the real
+     pthread_barrier_wait.
+
+     Why is the following correct?  It's a bit subtle.
+
+     If this is not the last thread arriving at the barrier, we simply
+     note its presence and return.  Because valgrind (at least as of
+     Nov 08) is single threaded, we are guaranteed safe from any race
+     conditions when in this function -- no other client threads are
+     running.
+
+     If this is the last thread, then we are again the only running
+     thread.  All the other threads will have either arrived at the
+     real pthread_barrier_wait or are on their way to it, but in any
+     case are guaranteed not to be able to move past it, because this
+     thread is currently in this function and so has not yet arrived
+     at the real pthread_barrier_wait.  That means that:
+
+     1. While we are in this function, none of the other threads
+        waiting at the barrier can move past it.
+
+     2. When this function returns (and simulated execution resumes),
+        this thread and all other waiting threads will be able to move
+        past the real barrier.
+
+     Because of this, it is now safe to update the vector clocks of
+     all threads, to represent the fact that they all arrived at the
+     barrier and have all moved on.  There is no danger of any
+     complications to do with some threads leaving the barrier and
+     racing back round to the front, whilst others are still leaving
+     (which is the primary source of complication in correct handling/
+     implementation of barriers).  That can't happen because we update
+     here our data structures so as to indicate that the threads have
+     passed the barrier, even though, as per (2) above, they are
+     guaranteed not to pass the barrier until we return.
+
+     This relies crucially on Valgrind being single threaded.  If that
+     changes, this will need to be reconsidered.
+   */
+   Thread* thr;
+   Bar*    bar;
+   SO*     so;
+   UWord   present, i;
+
+   if (SHOW_EVENTS >= 1)
+      VG_(printf)("evh__HG_PTHREAD_BARRIER_WAIT_PRE"
+                  "(tid=%d, barrier=%p)\n", 
+                  (Int)tid, (void*)barrier );
+
+   thr = map_threads_maybe_lookup( tid );
+   tl_assert(thr); /* cannot fail - Thread* must already exist */
+
+   bar = map_barrier_to_Bar_lookup_or_alloc(barrier);
+   tl_assert(bar);
+
+   if (!bar->initted) {
+      HG_(record_error_Misc)(
+         thr, "pthread_barrier_wait: barrier is uninitialised"
+      );
+      return; /* client is broken .. avoid assertions below */
+   }
+
+   /* guaranteed by _INIT_PRE above */
+   tl_assert(bar->size > 0);
+   tl_assert(bar->waiting);
+
+   VG_(addToXA)( bar->waiting, &thr );
+
+   /* guaranteed by this function */
+   present = VG_(sizeXA)(bar->waiting);
+   tl_assert(present > 0 && present <= bar->size);
+
+   if (present < bar->size)
+      return;
+
+   /* All the threads have arrived.  Now do the Interesting Bit.  Get
+      a new synchronisation object and do a weak send to it from all
+      the participating threads.  This makes its vector clocks be the
+      join of all the individual threads' vector clocks.  Then do a
+      strong receive from it back to all threads, so that their VCs
+      are a copy of it (hence are all equal to the join of their
+      original VCs.) */
+   so = libhb_so_alloc();
+
+   /* XXX check ->waiting has no duplicates */
+
+   tl_assert(bar->waiting);
+   tl_assert(VG_(sizeXA)(bar->waiting) == bar->size);
+
+   /* compute the join ... */
+   for (i = 0; i < bar->size; i++) {
+      Thread* t = *(Thread**)VG_(indexXA)(bar->waiting, i);
+      Thr* hbthr = t->hbthr;
+      libhb_so_send( hbthr, so, False/*weak send*/ );
+   }
+   /* ... and distribute to all threads */
+   for (i = 0; i < bar->size; i++) {
+      Thread* t = *(Thread**)VG_(indexXA)(bar->waiting, i);
+      Thr* hbthr = t->hbthr;
+      libhb_so_recv( hbthr, so, True/*strong recv*/ );
+   }
+
+   /* finally, we must empty out the waiting vector */
+   VG_(dropTailXA)(bar->waiting, VG_(sizeXA)(bar->waiting));
+
+   /* and we don't need this any more.  Perhaps a stack-allocated
+      SO would be better? */
+   libhb_so_dealloc(so);
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Lock acquisition order monitoring                      ---*/
+/*--------------------------------------------------------------*/
+
+/* FIXME: here are some optimisations still to do in
+          laog__pre_thread_acquires_lock.
+
+   The graph is structured so that if L1 --*--> L2 then L1 must be
+   acquired before L2.
+
+   The common case is that some thread T holds (eg) L1 L2 and L3 and
+   is repeatedly acquiring and releasing Ln, and there is no ordering
+   error in what it is doing.  Hence it repeatly:
+
+   (1) searches laog to see if Ln --*--> {L1,L2,L3}, which always 
+       produces the answer No (because there is no error).
+
+   (2) adds edges {L1,L2,L3} --> Ln to laog, which are already present
+       (because they already got added the first time T acquired Ln).
+
+   Hence cache these two events:
+
+   (1) Cache result of the query from last time.  Invalidate the cache
+       any time any edges are added to or deleted from laog.
+
+   (2) Cache these add-edge requests and ignore them if said edges
+       have already been added to laog.  Invalidate the cache any time
+       any edges are deleted from laog.
+*/
+
+typedef
+   struct {
+      WordSetID inns; /* in univ_laog */
+      WordSetID outs; /* in univ_laog */
+   }
+   LAOGLinks;
+
+/* lock order acquisition graph */
+static WordFM* laog = NULL; /* WordFM Lock* LAOGLinks* */
+
+/* EXPOSITION ONLY: for each edge in 'laog', record the two places
+   where that edge was created, so that we can show the user later if
+   we need to. */
+typedef
+   struct {
+      Addr        src_ga; /* Lock guest addresses for */
+      Addr        dst_ga; /* src/dst of the edge */
+      ExeContext* src_ec; /* And corresponding places where that */
+      ExeContext* dst_ec; /* ordering was established */
+   }
+   LAOGLinkExposition;
+
+static Word cmp_LAOGLinkExposition ( UWord llx1W, UWord llx2W ) {
+   /* Compare LAOGLinkExposition*s by (src_ga,dst_ga) field pair. */
+   LAOGLinkExposition* llx1 = (LAOGLinkExposition*)llx1W;
+   LAOGLinkExposition* llx2 = (LAOGLinkExposition*)llx2W;
+   if (llx1->src_ga < llx2->src_ga) return -1;
+   if (llx1->src_ga > llx2->src_ga) return  1;
+   if (llx1->dst_ga < llx2->dst_ga) return -1;
+   if (llx1->dst_ga > llx2->dst_ga) return  1;
+   return 0;
+}
+
+static WordFM* laog_exposition = NULL; /* WordFM LAOGLinkExposition* NULL */
+/* end EXPOSITION ONLY */
+
+
+__attribute__((noinline))
+static void laog__init ( void )
+{
+   tl_assert(!laog);
+   tl_assert(!laog_exposition);
+
+   laog = VG_(newFM)( HG_(zalloc), "hg.laog__init.1", 
+                      HG_(free), NULL/*unboxedcmp*/ );
+
+   laog_exposition = VG_(newFM)( HG_(zalloc), "hg.laog__init.2", HG_(free), 
+                                 cmp_LAOGLinkExposition );
+   tl_assert(laog);
+   tl_assert(laog_exposition);
+}
+
+static void laog__show ( Char* who ) {
+   Word i, ws_size;
+   UWord* ws_words;
+   Lock* me;
+   LAOGLinks* links;
+   VG_(printf)("laog (requested by %s) {\n", who);
+   VG_(initIterFM)( laog );
+   me = NULL;
+   links = NULL;
+   while (VG_(nextIterFM)( laog, (Word*)&me,
+                                 (Word*)&links )) {
+      tl_assert(me);
+      tl_assert(links);
+      VG_(printf)("   node %p:\n", me);
+      HG_(getPayloadWS)( &ws_words, &ws_size, univ_laog, links->inns );
+      for (i = 0; i < ws_size; i++)
+         VG_(printf)("      inn %#lx\n", ws_words[i] );
+      HG_(getPayloadWS)( &ws_words, &ws_size, univ_laog, links->outs );
+      for (i = 0; i < ws_size; i++)
+         VG_(printf)("      out %#lx\n", ws_words[i] );
+      me = NULL;
+      links = NULL;
+   }
+   VG_(doneIterFM)( laog );
+   VG_(printf)("}\n");
+}
+
+__attribute__((noinline))
+static void laog__add_edge ( Lock* src, Lock* dst ) {
+   Word       keyW;
+   LAOGLinks* links;
+   Bool       presentF, presentR;
+   if (0) VG_(printf)("laog__add_edge %p %p\n", src, dst);
+
+   /* Take the opportunity to sanity check the graph.  Record in
+      presentF if there is already a src->dst mapping in this node's
+      forwards links, and presentR if there is already a src->dst
+      mapping in this node's backwards links.  They should agree!
+      Also, we need to know whether the edge was already present so as
+      to decide whether or not to update the link details mapping.  We
+      can compute presentF and presentR essentially for free, so may
+      as well do this always. */
+   presentF = presentR = False;
+
+   /* Update the out edges for src */
+   keyW  = 0;
+   links = NULL;
+   if (VG_(lookupFM)( laog, &keyW, (Word*)&links, (Word)src )) {
+      WordSetID outs_new;
+      tl_assert(links);
+      tl_assert(keyW == (Word)src);
+      outs_new = HG_(addToWS)( univ_laog, links->outs, (Word)dst );
+      presentF = outs_new == links->outs;
+      links->outs = outs_new;
+   } else {
+      links = HG_(zalloc)("hg.lae.1", sizeof(LAOGLinks));
+      links->inns = HG_(emptyWS)( univ_laog );
+      links->outs = HG_(singletonWS)( univ_laog, (Word)dst );
+      VG_(addToFM)( laog, (Word)src, (Word)links );
+   }
+   /* Update the in edges for dst */
+   keyW  = 0;
+   links = NULL;
+   if (VG_(lookupFM)( laog, &keyW, (Word*)&links, (Word)dst )) {
+      WordSetID inns_new;
+      tl_assert(links);
+      tl_assert(keyW == (Word)dst);
+      inns_new = HG_(addToWS)( univ_laog, links->inns, (Word)src );
+      presentR = inns_new == links->inns;
+      links->inns = inns_new;
+   } else {
+      links = HG_(zalloc)("hg.lae.2", sizeof(LAOGLinks));
+      links->inns = HG_(singletonWS)( univ_laog, (Word)src );
+      links->outs = HG_(emptyWS)( univ_laog );
+      VG_(addToFM)( laog, (Word)dst, (Word)links );
+   }
+
+   tl_assert( (presentF && presentR) || (!presentF && !presentR) );
+
+   if (!presentF && src->acquired_at && dst->acquired_at) {
+      LAOGLinkExposition expo;
+      /* If this edge is entering the graph, and we have acquired_at
+         information for both src and dst, record those acquisition
+         points.  Hence, if there is later a violation of this
+         ordering, we can show the user the two places in which the
+         required src-dst ordering was previously established. */
+      if (0) VG_(printf)("acquire edge %#lx %#lx\n",
+                         src->guestaddr, dst->guestaddr);
+      expo.src_ga = src->guestaddr;
+      expo.dst_ga = dst->guestaddr;
+      expo.src_ec = NULL;
+      expo.dst_ec = NULL;
+      tl_assert(laog_exposition);
+      if (VG_(lookupFM)( laog_exposition, NULL, NULL, (Word)&expo )) {
+         /* we already have it; do nothing */
+      } else {
+         LAOGLinkExposition* expo2 = HG_(zalloc)("hg.lae.3", 
+                                               sizeof(LAOGLinkExposition));
+         expo2->src_ga = src->guestaddr;
+         expo2->dst_ga = dst->guestaddr;
+         expo2->src_ec = src->acquired_at;
+         expo2->dst_ec = dst->acquired_at;
+         VG_(addToFM)( laog_exposition, (Word)expo2, (Word)NULL );
+      }
+   }
+}
+
+__attribute__((noinline))
+static void laog__del_edge ( Lock* src, Lock* dst ) {
+   Word       keyW;
+   LAOGLinks* links;
+   if (0) VG_(printf)("laog__del_edge %p %p\n", src, dst);
+   /* Update the out edges for src */
+   keyW  = 0;
+   links = NULL;
+   if (VG_(lookupFM)( laog, &keyW, (Word*)&links, (Word)src )) {
+      tl_assert(links);
+      tl_assert(keyW == (Word)src);
+      links->outs = HG_(delFromWS)( univ_laog, links->outs, (Word)dst );
+   }
+   /* Update the in edges for dst */
+   keyW  = 0;
+   links = NULL;
+   if (VG_(lookupFM)( laog, &keyW, (Word*)&links, (Word)dst )) {
+      tl_assert(links);
+      tl_assert(keyW == (Word)dst);
+      links->inns = HG_(delFromWS)( univ_laog, links->inns, (Word)src );
+   }
+}
+
+__attribute__((noinline))
+static WordSetID /* in univ_laog */ laog__succs ( Lock* lk ) {
+   Word       keyW;
+   LAOGLinks* links;
+   keyW  = 0;
+   links = NULL;
+   if (VG_(lookupFM)( laog, &keyW, (Word*)&links, (Word)lk )) {
+      tl_assert(links);
+      tl_assert(keyW == (Word)lk);
+      return links->outs;
+   } else {
+      return HG_(emptyWS)( univ_laog );
+   }
+}
+
+__attribute__((noinline))
+static WordSetID /* in univ_laog */ laog__preds ( Lock* lk ) {
+   Word       keyW;
+   LAOGLinks* links;
+   keyW  = 0;
+   links = NULL;
+   if (VG_(lookupFM)( laog, &keyW, (Word*)&links, (Word)lk )) {
+      tl_assert(links);
+      tl_assert(keyW == (Word)lk);
+      return links->inns;
+   } else {
+      return HG_(emptyWS)( univ_laog );
+   }
+}
+
+__attribute__((noinline))
+static void laog__sanity_check ( Char* who ) {
+   Word i, ws_size;
+   UWord* ws_words;
+   Lock* me;
+   LAOGLinks* links;
+   if (UNLIKELY(!laog || !laog_exposition))
+      laog__init();
+   VG_(initIterFM)( laog );
+   me = NULL;
+   links = NULL;
+   if (0) VG_(printf)("laog sanity check\n");
+   while (VG_(nextIterFM)( laog, (Word*)&me,
+                                 (Word*)&links )) {
+      tl_assert(me);
+      tl_assert(links);
+      HG_(getPayloadWS)( &ws_words, &ws_size, univ_laog, links->inns );
+      for (i = 0; i < ws_size; i++) {
+         if ( ! HG_(elemWS)( univ_laog, 
+                             laog__succs( (Lock*)ws_words[i] ), 
+                             (Word)me ))
+            goto bad;
+      }
+      HG_(getPayloadWS)( &ws_words, &ws_size, univ_laog, links->outs );
+      for (i = 0; i < ws_size; i++) {
+         if ( ! HG_(elemWS)( univ_laog, 
+                             laog__preds( (Lock*)ws_words[i] ), 
+                             (Word)me ))
+            goto bad;
+      }
+      me = NULL;
+      links = NULL;
+   }
+   VG_(doneIterFM)( laog );
+   return;
+
+  bad:
+   VG_(printf)("laog__sanity_check(%s) FAILED\n", who);
+   laog__show(who);
+   tl_assert(0);
+}
+
+/* If there is a path in laog from 'src' to any of the elements in
+   'dst', return an arbitrarily chosen element of 'dst' reachable from
+   'src'.  If no path exist from 'src' to any element in 'dst', return
+   NULL. */
+__attribute__((noinline))
+static
+Lock* laog__do_dfs_from_to ( Lock* src, WordSetID dsts /* univ_lsets */ )
+{
+   Lock*     ret;
+   Word      i, ssz;
+   XArray*   stack;   /* of Lock* */
+   WordFM*   visited; /* Lock* -> void, iow, Set(Lock*) */
+   Lock*     here;
+   WordSetID succs;
+   Word      succs_size;
+   UWord*    succs_words;
+   //laog__sanity_check();
+
+   /* If the destination set is empty, we can never get there from
+      'src' :-), so don't bother to try */
+   if (HG_(isEmptyWS)( univ_lsets, dsts ))
+      return NULL;
+
+   ret     = NULL;
+   stack   = VG_(newXA)( HG_(zalloc), "hg.lddft.1", HG_(free), sizeof(Lock*) );
+   visited = VG_(newFM)( HG_(zalloc), "hg.lddft.2", HG_(free), NULL/*unboxedcmp*/ );
+
+   (void) VG_(addToXA)( stack, &src );
+
+   while (True) {
+
+      ssz = VG_(sizeXA)( stack );
+
+      if (ssz == 0) { ret = NULL; break; }
+
+      here = *(Lock**) VG_(indexXA)( stack, ssz-1 );
+      VG_(dropTailXA)( stack, 1 );
+
+      if (HG_(elemWS)( univ_lsets, dsts, (Word)here )) { ret = here; break; }
+
+      if (VG_(lookupFM)( visited, NULL, NULL, (Word)here ))
+         continue;
+
+      VG_(addToFM)( visited, (Word)here, 0 );
+
+      succs = laog__succs( here );
+      HG_(getPayloadWS)( &succs_words, &succs_size, univ_laog, succs );
+      for (i = 0; i < succs_size; i++)
+         (void) VG_(addToXA)( stack, &succs_words[i] );
+   }
+
+   VG_(deleteFM)( visited, NULL, NULL );
+   VG_(deleteXA)( stack );
+   return ret;
+}
+
+
+/* Thread 'thr' is acquiring 'lk'.  Check for inconsistent ordering
+   between 'lk' and the locks already held by 'thr' and issue a
+   complaint if so.  Also, update the ordering graph appropriately.
+*/
+__attribute__((noinline))
+static void laog__pre_thread_acquires_lock ( 
+               Thread* thr, /* NB: BEFORE lock is added */
+               Lock*   lk
+            )
+{
+   UWord*   ls_words;
+   Word     ls_size, i;
+   Lock*    other;
+
+   /* It may be that 'thr' already holds 'lk' and is recursively
+      relocking in.  In this case we just ignore the call. */
+   /* NB: univ_lsets really is correct here */
+   if (HG_(elemWS)( univ_lsets, thr->locksetA, (Word)lk ))
+      return;
+
+   if (UNLIKELY(!laog || !laog_exposition))
+      laog__init();
+
+   /* First, the check.  Complain if there is any path in laog from lk
+      to any of the locks already held by thr, since if any such path
+      existed, it would mean that previously lk was acquired before
+      (rather than after, as we are doing here) at least one of those
+      locks.
+   */
+   other = laog__do_dfs_from_to(lk, thr->locksetA);
+   if (other) {
+      LAOGLinkExposition key, *found;
+      /* So we managed to find a path lk --*--> other in the graph,
+         which implies that 'lk' should have been acquired before
+         'other' but is in fact being acquired afterwards.  We present
+         the lk/other arguments to record_error_LockOrder in the order
+         in which they should have been acquired. */
+      /* Go look in the laog_exposition mapping, to find the allocation
+         points for this edge, so we can show the user. */
+      key.src_ga = lk->guestaddr;
+      key.dst_ga = other->guestaddr;
+      key.src_ec = NULL;
+      key.dst_ec = NULL;
+      found = NULL;
+      if (VG_(lookupFM)( laog_exposition,
+                         (Word*)&found, NULL, (Word)&key )) {
+         tl_assert(found != &key);
+         tl_assert(found->src_ga == key.src_ga);
+         tl_assert(found->dst_ga == key.dst_ga);
+         tl_assert(found->src_ec);
+         tl_assert(found->dst_ec);
+         HG_(record_error_LockOrder)( 
+            thr, lk->guestaddr, other->guestaddr,
+                 found->src_ec, found->dst_ec );
+      } else {
+         /* Hmm.  This can't happen (can it?) */
+         HG_(record_error_LockOrder)(
+            thr, lk->guestaddr, other->guestaddr,
+                 NULL, NULL );
+      }
+   }
+
+   /* Second, add to laog the pairs
+        (old, lk)  |  old <- locks already held by thr
+      Since both old and lk are currently held by thr, their acquired_at
+      fields must be non-NULL.
+   */
+   tl_assert(lk->acquired_at);
+   HG_(getPayloadWS)( &ls_words, &ls_size, univ_lsets, thr->locksetA );
+   for (i = 0; i < ls_size; i++) {
+      Lock* old = (Lock*)ls_words[i];
+      tl_assert(old->acquired_at);
+      laog__add_edge( old, lk );
+   }
+
+   /* Why "except_Locks" ?  We're here because a lock is being
+      acquired by a thread, and we're in an inconsistent state here.
+      See the call points in evhH__post_thread_{r,w}_acquires_lock.
+      When called in this inconsistent state, locks__sanity_check duly
+      barfs. */
+   if (HG_(clo_sanity_flags) & SCE_LAOG)
+      all_except_Locks__sanity_check("laog__pre_thread_acquires_lock-post");
+}
+
+
+/* Delete from 'laog' any pair mentioning a lock in locksToDelete */
+
+__attribute__((noinline))
+static void laog__handle_one_lock_deletion ( Lock* lk )
+{
+   WordSetID preds, succs;
+   Word preds_size, succs_size, i, j;
+   UWord *preds_words, *succs_words;
+
+   if (UNLIKELY(!laog || !laog_exposition))
+      laog__init();
+
+   preds = laog__preds( lk );
+   succs = laog__succs( lk );
+
+   HG_(getPayloadWS)( &preds_words, &preds_size, univ_laog, preds );
+   for (i = 0; i < preds_size; i++)
+      laog__del_edge( (Lock*)preds_words[i], lk );
+
+   HG_(getPayloadWS)( &succs_words, &succs_size, univ_laog, succs );
+   for (j = 0; j < succs_size; j++)
+      laog__del_edge( lk, (Lock*)succs_words[j] );
+
+   for (i = 0; i < preds_size; i++) {
+      for (j = 0; j < succs_size; j++) {
+         if (preds_words[i] != succs_words[j]) {
+            /* This can pass unlocked locks to laog__add_edge, since
+               we're deleting stuff.  So their acquired_at fields may
+               be NULL. */
+            laog__add_edge( (Lock*)preds_words[i], (Lock*)succs_words[j] );
+         }
+      }
+   }
+}
+
+//__attribute__((noinline))
+//static void laog__handle_lock_deletions (
+//               WordSetID /* in univ_laog */ locksToDelete
+//            )
+//{
+//   Word   i, ws_size;
+//   UWord* ws_words;
+//
+//   if (UNLIKELY(!laog || !laog_exposition))
+//      laog__init();
+//
+//   HG_(getPayloadWS)( &ws_words, &ws_size, univ_lsets, locksToDelete );
+//   for (i = 0; i < ws_size; i++)
+//      laog__handle_one_lock_deletion( (Lock*)ws_words[i] );
+//
+//   if (HG_(clo_sanity_flags) & SCE_LAOG)
+//      all__sanity_check("laog__handle_lock_deletions-post");
+//}
+
+
+/*--------------------------------------------------------------*/
+/*--- Malloc/free replacements                               ---*/
+/*--------------------------------------------------------------*/
+
+typedef
+   struct {
+      void*       next;    /* required by m_hashtable */
+      Addr        payload; /* ptr to actual block    */
+      SizeT       szB;     /* size requested         */
+      ExeContext* where;   /* where it was allocated */
+      Thread*     thr;     /* allocating thread      */
+   }
+   MallocMeta;
+
+/* A hash table of MallocMetas, used to track malloc'd blocks
+   (obviously). */
+static VgHashTable hg_mallocmeta_table = NULL;
+
+
+static MallocMeta* new_MallocMeta ( void ) {
+   MallocMeta* md = HG_(zalloc)( "hg.new_MallocMeta.1", sizeof(MallocMeta) );
+   tl_assert(md);
+   return md;
+}
+static void delete_MallocMeta ( MallocMeta* md ) {
+   HG_(free)(md);
+}
+
+
+/* Allocate a client block and set up the metadata for it. */
+
+static
+void* handle_alloc ( ThreadId tid, 
+                     SizeT szB, SizeT alignB, Bool is_zeroed )
+{
+   Addr        p;
+   MallocMeta* md;
+
+   tl_assert( ((SSizeT)szB) >= 0 );
+   p = (Addr)VG_(cli_malloc)(alignB, szB);
+   if (!p) {
+      return NULL;
+   }
+   if (is_zeroed)
+      VG_(memset)((void*)p, 0, szB);
+
+   /* Note that map_threads_lookup must succeed (cannot assert), since
+      memory can only be allocated by currently alive threads, hence
+      they must have an entry in map_threads. */
+   md = new_MallocMeta();
+   md->payload = p;
+   md->szB     = szB;
+   md->where   = VG_(record_ExeContext)( tid, 0 );
+   md->thr     = map_threads_lookup( tid );
+
+   VG_(HT_add_node)( hg_mallocmeta_table, (VgHashNode*)md );
+
+   /* Tell the lower level memory wranglers. */
+   evh__new_mem_heap( p, szB, is_zeroed );
+
+   return (void*)p;
+}
+
+/* Re the checks for less-than-zero (also in hg_cli__realloc below):
+   Cast to a signed type to catch any unexpectedly negative args.
+   We're assuming here that the size asked for is not greater than
+   2^31 bytes (for 32-bit platforms) or 2^63 bytes (for 64-bit
+   platforms). */
+static void* hg_cli__malloc ( ThreadId tid, SizeT n ) {
+   if (((SSizeT)n) < 0) return NULL;
+   return handle_alloc ( tid, n, VG_(clo_alignment),
+                         /*is_zeroed*/False );
+}
+static void* hg_cli____builtin_new ( ThreadId tid, SizeT n ) {
+   if (((SSizeT)n) < 0) return NULL;
+   return handle_alloc ( tid, n, VG_(clo_alignment),
+                         /*is_zeroed*/False );
+}
+static void* hg_cli____builtin_vec_new ( ThreadId tid, SizeT n ) {
+   if (((SSizeT)n) < 0) return NULL;
+   return handle_alloc ( tid, n, VG_(clo_alignment), 
+                         /*is_zeroed*/False );
+}
+static void* hg_cli__memalign ( ThreadId tid, SizeT align, SizeT n ) {
+   if (((SSizeT)n) < 0) return NULL;
+   return handle_alloc ( tid, n, align, 
+                         /*is_zeroed*/False );
+}
+static void* hg_cli__calloc ( ThreadId tid, SizeT nmemb, SizeT size1 ) {
+   if ( ((SSizeT)nmemb) < 0 || ((SSizeT)size1) < 0 ) return NULL;
+   return handle_alloc ( tid, nmemb*size1, VG_(clo_alignment),
+                         /*is_zeroed*/True );
+}
+
+
+/* Free a client block, including getting rid of the relevant
+   metadata. */
+
+static void handle_free ( ThreadId tid, void* p )
+{
+   MallocMeta *md, *old_md;
+   SizeT      szB;
+
+   /* First see if we can find the metadata for 'p'. */
+   md = (MallocMeta*) VG_(HT_lookup)( hg_mallocmeta_table, (UWord)p );
+   if (!md)
+      return; /* apparently freeing a bogus address.  Oh well. */
+
+   tl_assert(md->payload == (Addr)p);
+   szB = md->szB;
+
+   /* Nuke the metadata block */
+   old_md = (MallocMeta*)
+            VG_(HT_remove)( hg_mallocmeta_table, (UWord)p );
+   tl_assert(old_md); /* it must be present - we just found it */
+   tl_assert(old_md == md);
+   tl_assert(old_md->payload == (Addr)p);
+
+   VG_(cli_free)((void*)old_md->payload);
+   delete_MallocMeta(old_md);
+
+   /* Tell the lower level memory wranglers. */
+   evh__die_mem_heap( (Addr)p, szB );
+}
+
+static void hg_cli__free ( ThreadId tid, void* p ) {
+   handle_free(tid, p);
+}
+static void hg_cli____builtin_delete ( ThreadId tid, void* p ) {
+   handle_free(tid, p);
+}
+static void hg_cli____builtin_vec_delete ( ThreadId tid, void* p ) {
+   handle_free(tid, p);
+}
+
+
+static void* hg_cli__realloc ( ThreadId tid, void* payloadV, SizeT new_size )
+{
+   MallocMeta *md, *md_new, *md_tmp;
+   SizeT      i;
+
+   Addr payload = (Addr)payloadV;
+
+   if (((SSizeT)new_size) < 0) return NULL;
+
+   md = (MallocMeta*) VG_(HT_lookup)( hg_mallocmeta_table, (UWord)payload );
+   if (!md)
+      return NULL; /* apparently realloc-ing a bogus address.  Oh well. */
+  
+   tl_assert(md->payload == payload);
+
+   if (md->szB == new_size) {
+      /* size unchanged */
+      md->where = VG_(record_ExeContext)(tid, 0);
+      return payloadV;
+   }
+
+   if (md->szB > new_size) {
+      /* new size is smaller */
+      md->szB   = new_size;
+      md->where = VG_(record_ExeContext)(tid, 0);
+      evh__die_mem_heap( md->payload + new_size, md->szB - new_size );
+      return payloadV;
+   }
+
+   /* else */ {
+      /* new size is bigger */
+      Addr p_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_size);
+
+      /* First half kept and copied, second half new */
+      // FIXME: shouldn't we use a copier which implements the
+      // memory state machine?
+      shadow_mem_copy_range( payload, p_new, md->szB );
+      evh__new_mem_heap ( p_new + md->szB, new_size - md->szB,
+                          /*inited*/False );
+      /* FIXME: can anything funny happen here?  specifically, if the
+         old range contained a lock, then die_mem_heap will complain.
+         Is that the correct behaviour?  Not sure. */
+      evh__die_mem_heap( payload, md->szB );
+
+      /* Copy from old to new */
+      for (i = 0; i < md->szB; i++)
+         ((UChar*)p_new)[i] = ((UChar*)payload)[i];
+
+      /* Because the metadata hash table is index by payload address,
+         we have to get rid of the old hash table entry and make a new
+         one.  We can't just modify the existing metadata in place,
+         because then it would (almost certainly) be in the wrong hash
+         chain. */
+      md_new = new_MallocMeta();
+      *md_new = *md;
+
+      md_tmp = VG_(HT_remove)( hg_mallocmeta_table, payload );
+      tl_assert(md_tmp);
+      tl_assert(md_tmp == md);
+
+      VG_(cli_free)((void*)md->payload);
+      delete_MallocMeta(md);
+
+      /* Update fields */
+      md_new->where   = VG_(record_ExeContext)( tid, 0 );
+      md_new->szB     = new_size;
+      md_new->payload = p_new;
+      md_new->thr     = map_threads_lookup( tid );
+
+      /* and add */
+      VG_(HT_add_node)( hg_mallocmeta_table, (VgHashNode*)md_new );
+
+      return (void*)p_new;
+   }  
+}
+
+static SizeT hg_cli_malloc_usable_size ( ThreadId tid, void* p )
+{
+   MallocMeta *md = VG_(HT_lookup)( hg_mallocmeta_table, (UWord)p );
+
+   // There may be slop, but pretend there isn't because only the asked-for
+   // area will have been shadowed properly.
+   return ( md ? md->szB : 0 );
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Instrumentation                                        ---*/
+/*--------------------------------------------------------------*/
+
+static void instrument_mem_access ( IRSB*   bbOut, 
+                                    IRExpr* addr,
+                                    Int     szB,
+                                    Bool    isStore,
+                                    Int     hWordTy_szB )
+{
+   IRType   tyAddr   = Ity_INVALID;
+   HChar*   hName    = NULL;
+   void*    hAddr    = NULL;
+   Int      regparms = 0;
+   IRExpr** argv     = NULL;
+   IRDirty* di       = NULL;
+
+   tl_assert(isIRAtom(addr));
+   tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
+
+   tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
+   tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
+
+   /* So the effective address is in 'addr' now. */
+   regparms = 1; // unless stated otherwise
+   if (isStore) {
+      switch (szB) {
+         case 1:
+            hName = "evh__mem_help_write_1";
+            hAddr = &evh__mem_help_write_1;
+            argv = mkIRExprVec_1( addr );
+            break;
+         case 2:
+            hName = "evh__mem_help_write_2";
+            hAddr = &evh__mem_help_write_2;
+            argv = mkIRExprVec_1( addr );
+            break;
+         case 4:
+            hName = "evh__mem_help_write_4";
+            hAddr = &evh__mem_help_write_4;
+            argv = mkIRExprVec_1( addr );
+            break;
+         case 8:
+            hName = "evh__mem_help_write_8";
+            hAddr = &evh__mem_help_write_8;
+            argv = mkIRExprVec_1( addr );
+            break;
+         default:
+            tl_assert(szB > 8 && szB <= 512); /* stay sane */
+            regparms = 2;
+            hName = "evh__mem_help_write_N";
+            hAddr = &evh__mem_help_write_N;
+            argv = mkIRExprVec_2( addr, mkIRExpr_HWord( szB ));
+            break;
+      }
+   } else {
+      switch (szB) {
+         case 1:
+            hName = "evh__mem_help_read_1";
+            hAddr = &evh__mem_help_read_1;
+            argv = mkIRExprVec_1( addr );
+            break;
+         case 2:
+            hName = "evh__mem_help_read_2";
+            hAddr = &evh__mem_help_read_2;
+            argv = mkIRExprVec_1( addr );
+            break;
+         case 4:
+            hName = "evh__mem_help_read_4";
+            hAddr = &evh__mem_help_read_4;
+            argv = mkIRExprVec_1( addr );
+            break;
+         case 8:
+            hName = "evh__mem_help_read_8";
+            hAddr = &evh__mem_help_read_8;
+            argv = mkIRExprVec_1( addr );
+            break;
+         default: 
+            tl_assert(szB > 8 && szB <= 512); /* stay sane */
+            regparms = 2;
+            hName = "evh__mem_help_read_N";
+            hAddr = &evh__mem_help_read_N;
+            argv = mkIRExprVec_2( addr, mkIRExpr_HWord( szB ));
+            break;
+      }
+   }
+
+   /* Add the helper. */
+   tl_assert(hName);
+   tl_assert(hAddr);
+   tl_assert(argv);
+   di = unsafeIRDirty_0_N( regparms,
+                           hName, VG_(fnptr_to_fnentry)( hAddr ),
+                           argv );
+   addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
+}
+
+
+//static void instrument_memory_bus_event ( IRSB* bbOut, IRMBusEvent event )
+//{
+//   switch (event) {
+//      case Imbe_SnoopedStoreBegin:
+//      case Imbe_SnoopedStoreEnd:
+//         /* These arise from ppc stwcx. insns.  They should perhaps be
+//            handled better. */
+//         break;
+//      case Imbe_Fence:
+//         break; /* not interesting */
+//      case Imbe_BusLock:
+//      case Imbe_BusUnlock:
+//         addStmtToIRSB(
+//            bbOut,
+//            IRStmt_Dirty(
+//               unsafeIRDirty_0_N( 
+//                  0/*regparms*/, 
+//                  event == Imbe_BusLock ? "evh__bus_lock"
+//                                        : "evh__bus_unlock",
+//                  VG_(fnptr_to_fnentry)(
+//                     event == Imbe_BusLock ? &evh__bus_lock 
+//                                           : &evh__bus_unlock 
+//                  ),
+//                  mkIRExprVec_0() 
+//               )
+//            )
+//         );
+//         break;
+//      default:
+//         tl_assert(0);
+//   }
+//}
+
+
+static
+IRSB* hg_instrument ( VgCallbackClosure* closure,
+                      IRSB* bbIn,
+                      VexGuestLayout* layout,
+                      VexGuestExtents* vge,
+                      IRType gWordTy, IRType hWordTy )
+{
+   Int   i;
+   IRSB* bbOut;
+   Bool  x86busLocked   = False;
+   Bool  isSnoopedStore = False;
+
+   if (gWordTy != hWordTy) {
+      /* We don't currently support this case. */
+      VG_(tool_panic)("host/guest word size mismatch");
+   }
+
+   /* Set up BB */
+   bbOut           = emptyIRSB();
+   bbOut->tyenv    = deepCopyIRTypeEnv(bbIn->tyenv);
+   bbOut->next     = deepCopyIRExpr(bbIn->next);
+   bbOut->jumpkind = bbIn->jumpkind;
+
+   // Copy verbatim any IR preamble preceding the first IMark
+   i = 0;
+   while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) {
+      addStmtToIRSB( bbOut, bbIn->stmts[i] );
+      i++;
+   }
+
+   for (/*use current i*/; i < bbIn->stmts_used; i++) {
+      IRStmt* st = bbIn->stmts[i];
+      tl_assert(st);
+      tl_assert(isFlatIRStmt(st));
+      switch (st->tag) {
+         case Ist_NoOp:
+         case Ist_AbiHint:
+         case Ist_Put:
+         case Ist_PutI:
+         case Ist_IMark:
+         case Ist_Exit:
+            /* None of these can contain any memory references. */
+            break;
+
+         case Ist_MBE:
+            //instrument_memory_bus_event( bbOut, st->Ist.MBE.event );
+            switch (st->Ist.MBE.event) {
+               case Imbe_Fence:
+                  break; /* not interesting */
+               /* Imbe_Bus{Lock,Unlock} arise from x86/amd64 LOCK
+                  prefixed instructions. */
+               case Imbe_BusLock:
+                  tl_assert(x86busLocked == False);
+                  x86busLocked = True;
+                  break;
+               case Imbe_BusUnlock:
+                  tl_assert(x86busLocked == True);
+                  x86busLocked = False;
+                  break;
+                  /* Imbe_SnoopedStore{Begin,End} arise from ppc
+                     stwcx. instructions. */
+               case Imbe_SnoopedStoreBegin:
+                  tl_assert(isSnoopedStore == False);
+                  isSnoopedStore = True;
+                  break;
+               case Imbe_SnoopedStoreEnd:
+                  tl_assert(isSnoopedStore == True);
+                  isSnoopedStore = False;
+                  break;
+               default:
+                  goto unhandled;
+            }
+            break;
+
+         case Ist_Store:
+            if (!x86busLocked && !isSnoopedStore)
+               instrument_mem_access( 
+                  bbOut, 
+                  st->Ist.Store.addr, 
+                  sizeofIRType(typeOfIRExpr(bbIn->tyenv, st->Ist.Store.data)),
+                  True/*isStore*/,
+                  sizeofIRType(hWordTy)
+               );
+               break;
+
+         case Ist_WrTmp: {
+            IRExpr* data = st->Ist.WrTmp.data;
+            if (data->tag == Iex_Load) {
+               instrument_mem_access(
+                  bbOut,
+                  data->Iex.Load.addr,
+                  sizeofIRType(data->Iex.Load.ty),
+                  False/*!isStore*/,
+                  sizeofIRType(hWordTy)
+               );
+            }
+            break;
+         }
+
+         case Ist_Dirty: {
+            Int      dataSize;
+            IRDirty* d = st->Ist.Dirty.details;
+            if (d->mFx != Ifx_None) {
+               /* This dirty helper accesses memory.  Collect the
+                  details. */
+               tl_assert(d->mAddr != NULL);
+               tl_assert(d->mSize != 0);
+               dataSize = d->mSize;
+               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
+                  instrument_mem_access( 
+                     bbOut, d->mAddr, dataSize, False/*!isStore*/,
+                     sizeofIRType(hWordTy)
+                  );
+               }
+               /* This isn't really correct.  Really the
+                  instrumentation should be only added when
+                  (!x86busLocked && !isSnoopedStore), just like with
+                  Ist_Store.  Still, I don't think this is
+                  particularly important. */
+               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
+                  instrument_mem_access( 
+                     bbOut, d->mAddr, dataSize, True/*isStore*/,
+                     sizeofIRType(hWordTy)
+                  );
+               }
+            } else {
+               tl_assert(d->mAddr == NULL);
+               tl_assert(d->mSize == 0);
+            }
+            break;
+         }
+
+         default:
+         unhandled:
+            ppIRStmt(st);
+            tl_assert(0);
+
+      } /* switch (st->tag) */
+
+      addStmtToIRSB( bbOut, st );
+   } /* iterate over bbIn->stmts */
+
+   return bbOut;
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- Client requests                                          ---*/
+/*----------------------------------------------------------------*/
+
+/* Sheesh.  Yet another goddam finite map. */
+static WordFM* map_pthread_t_to_Thread = NULL; /* pthread_t -> Thread* */
+
+static void map_pthread_t_to_Thread_INIT ( void ) {
+   if (UNLIKELY(map_pthread_t_to_Thread == NULL)) {
+      map_pthread_t_to_Thread = VG_(newFM)( HG_(zalloc), "hg.mpttT.1", 
+                                            HG_(free), NULL );
+      tl_assert(map_pthread_t_to_Thread != NULL);
+   }
+}
+
+
+static 
+Bool hg_handle_client_request ( ThreadId tid, UWord* args, UWord* ret)
+{
+   if (!VG_IS_TOOL_USERREQ('H','G',args[0]))
+      return False;
+
+   /* Anything that gets past the above check is one of ours, so we
+      should be able to handle it. */
+
+   /* default, meaningless return value, unless otherwise set */
+   *ret = 0;
+
+   switch (args[0]) {
+
+      /* --- --- User-visible client requests --- --- */
+
+      case VG_USERREQ__HG_CLEAN_MEMORY:
+         if (0) VG_(printf)("VG_USERREQ__HG_CLEAN_MEMORY(%#lx,%ld)\n",
+                            args[1], args[2]);
+         /* Call die_mem to (expensively) tidy up properly, if there
+            are any held locks etc in the area.  Calling evh__die_mem
+            and then evh__new_mem is a bit inefficient; probably just
+            the latter would do. */
+         if (args[2] > 0) { /* length */
+            evh__die_mem(args[1], args[2]);
+            /* and then set it to New */
+            evh__new_mem(args[1], args[2]);
+         }
+         break;
+
+      /* --- --- Client requests for Helgrind's use only --- --- */
+
+      /* Some thread is telling us its pthread_t value.  Record the
+         binding between that and the associated Thread*, so we can
+         later find the Thread* again when notified of a join by the
+         thread. */
+      case _VG_USERREQ__HG_SET_MY_PTHREAD_T: {
+         Thread* my_thr = NULL;
+         if (0)
+         VG_(printf)("SET_MY_PTHREAD_T (tid %d): pthread_t = %p\n", (Int)tid,
+                     (void*)args[1]);
+         map_pthread_t_to_Thread_INIT();
+         my_thr = map_threads_maybe_lookup( tid );
+         /* This assertion should hold because the map_threads (tid to
+            Thread*) binding should have been made at the point of
+            low-level creation of this thread, which should have
+            happened prior to us getting this client request for it.
+            That's because this client request is sent from
+            client-world from the 'thread_wrapper' function, which
+            only runs once the thread has been low-level created. */
+         tl_assert(my_thr != NULL);
+         /* So now we know that (pthread_t)args[1] is associated with
+            (Thread*)my_thr.  Note that down. */
+         if (0)
+         VG_(printf)("XXXX: bind pthread_t %p to Thread* %p\n",
+                     (void*)args[1], (void*)my_thr );
+         VG_(addToFM)( map_pthread_t_to_Thread, (Word)args[1], (Word)my_thr );
+         break;
+      }
+
+      case _VG_USERREQ__HG_PTH_API_ERROR: {
+         Thread* my_thr = NULL;
+         map_pthread_t_to_Thread_INIT();
+         my_thr = map_threads_maybe_lookup( tid );
+         tl_assert(my_thr); /* See justification above in SET_MY_PTHREAD_T */
+         HG_(record_error_PthAPIerror)(
+            my_thr, (HChar*)args[1], (Word)args[2], (HChar*)args[3] );
+         break;
+      }
+
+      /* This thread (tid) has completed a join with the quitting
+         thread whose pthread_t is in args[1]. */
+      case _VG_USERREQ__HG_PTHREAD_JOIN_POST: {
+         Thread* thr_q = NULL; /* quitter Thread* */
+         Bool    found = False;
+         if (0)
+         VG_(printf)("NOTIFY_JOIN_COMPLETE (tid %d): quitter = %p\n", (Int)tid,
+                     (void*)args[1]);
+         map_pthread_t_to_Thread_INIT();
+         found = VG_(lookupFM)( map_pthread_t_to_Thread, 
+                                NULL, (Word*)&thr_q, (Word)args[1] );
+          /* Can this fail?  It would mean that our pthread_join
+             wrapper observed a successful join on args[1] yet that
+             thread never existed (or at least, it never lodged an
+             entry in the mapping (via SET_MY_PTHREAD_T)).  Which
+             sounds like a bug in the threads library. */
+         // FIXME: get rid of this assertion; handle properly
+         tl_assert(found);
+         if (found) {
+            if (0)
+            VG_(printf)(".................... quitter Thread* = %p\n", 
+                        thr_q);
+            evh__HG_PTHREAD_JOIN_POST( tid, thr_q );
+         }
+         break;
+      }
+
+      /* EXPOSITION only: by intercepting lock init events we can show
+         the user where the lock was initialised, rather than only
+         being able to show where it was first locked.  Intercepting
+         lock initialisations is not necessary for the basic operation
+         of the race checker. */
+      case _VG_USERREQ__HG_PTHREAD_MUTEX_INIT_POST:
+         evh__HG_PTHREAD_MUTEX_INIT_POST( tid, (void*)args[1], args[2] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_MUTEX_DESTROY_PRE:
+         evh__HG_PTHREAD_MUTEX_DESTROY_PRE( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE:   // pth_mx_t*
+         evh__HG_PTHREAD_MUTEX_UNLOCK_PRE( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_POST:  // pth_mx_t*
+         evh__HG_PTHREAD_MUTEX_UNLOCK_POST( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_PRE:     // pth_mx_t*, Word
+         evh__HG_PTHREAD_MUTEX_LOCK_PRE( tid, (void*)args[1], args[2] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST:    // pth_mx_t*
+         evh__HG_PTHREAD_MUTEX_LOCK_POST( tid, (void*)args[1] );
+         break;
+
+      /* This thread is about to do pthread_cond_signal on the
+         pthread_cond_t* in arg[1].  Ditto pthread_cond_broadcast. */
+      case _VG_USERREQ__HG_PTHREAD_COND_SIGNAL_PRE:
+      case _VG_USERREQ__HG_PTHREAD_COND_BROADCAST_PRE:
+         evh__HG_PTHREAD_COND_SIGNAL_PRE( tid, (void*)args[1] );
+         break;
+
+      /* Entry into pthread_cond_wait, cond=arg[1], mutex=arg[2].
+         Returns a flag indicating whether or not the mutex is believed to be
+         valid for this operation. */
+      case _VG_USERREQ__HG_PTHREAD_COND_WAIT_PRE: {
+         Bool mutex_is_valid
+            = evh__HG_PTHREAD_COND_WAIT_PRE( tid, (void*)args[1], 
+                                                  (void*)args[2] );
+         *ret = mutex_is_valid ? 1 : 0;
+         break;
+      }
+
+      /* cond=arg[1] */
+      case _VG_USERREQ__HG_PTHREAD_COND_DESTROY_PRE:
+         evh__HG_PTHREAD_COND_DESTROY_PRE( tid, (void*)args[1] );
+         break;
+
+      /* Thread successfully completed pthread_cond_wait, cond=arg[1],
+         mutex=arg[2] */
+      case _VG_USERREQ__HG_PTHREAD_COND_WAIT_POST:
+         evh__HG_PTHREAD_COND_WAIT_POST( tid,
+                                         (void*)args[1], (void*)args[2] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_RWLOCK_INIT_POST:
+         evh__HG_PTHREAD_RWLOCK_INIT_POST( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_RWLOCK_DESTROY_PRE:
+         evh__HG_PTHREAD_RWLOCK_DESTROY_PRE( tid, (void*)args[1] );
+         break;
+
+      /* rwlock=arg[1], isW=arg[2], isTryLock=arg[3] */
+      case _VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE:
+         evh__HG_PTHREAD_RWLOCK_LOCK_PRE( tid, (void*)args[1],
+                                               args[2], args[3] );
+         break;
+
+      /* rwlock=arg[1], isW=arg[2] */
+      case _VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_POST:
+         evh__HG_PTHREAD_RWLOCK_LOCK_POST( tid, (void*)args[1], args[2] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_PRE:
+         evh__HG_PTHREAD_RWLOCK_UNLOCK_PRE( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_POST:
+         evh__HG_PTHREAD_RWLOCK_UNLOCK_POST( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_POSIX_SEM_INIT_POST: /* sem_t*, unsigned long */
+         evh__HG_POSIX_SEM_INIT_POST( tid, (void*)args[1], args[2] );
+         break;
+
+      case _VG_USERREQ__HG_POSIX_SEM_DESTROY_PRE: /* sem_t* */
+         evh__HG_POSIX_SEM_DESTROY_PRE( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_POSIX_SEM_POST_PRE: /* sem_t* */
+         evh__HG_POSIX_SEM_POST_PRE( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_POSIX_SEM_WAIT_POST: /* sem_t* */
+         evh__HG_POSIX_SEM_WAIT_POST( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_BARRIER_INIT_PRE:
+         /* pth_bar_t*, ulong */
+         evh__HG_PTHREAD_BARRIER_INIT_PRE( tid, (void*)args[1], args[2] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_BARRIER_WAIT_PRE:
+         /* pth_bar_t* */
+         evh__HG_PTHREAD_BARRIER_WAIT_PRE( tid, (void*)args[1] );
+         break;
+
+      case _VG_USERREQ__HG_PTHREAD_BARRIER_DESTROY_PRE:
+         /* pth_bar_t* */
+         evh__HG_PTHREAD_BARRIER_DESTROY_PRE( tid, (void*)args[1] );
+         break;
+
+      default:
+         /* Unhandled Helgrind client request! */
+         tl_assert2(0, "unhandled Helgrind client request 0x%lx",
+                       args[0]);
+   }
+
+   return True;
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- Setup                                                    ---*/
+/*----------------------------------------------------------------*/
+
+static Bool hg_process_cmd_line_option ( Char* arg )
+{
+   Char* tmp_str;
+
+   if      VG_BOOL_CLO(arg, "--track-lockorders",
+                            HG_(clo_track_lockorders)) {}
+   else if VG_BOOL_CLO(arg, "--cmp-race-err-addrs",
+                            HG_(clo_cmp_race_err_addrs)) {}
+   else if VG_BOOL_CLO(arg, "--show-conflicts",
+                            HG_(clo_show_conflicts)) {}
+
+   /* If you change the 10k/10mill limits, remember to also change
+      them in assertions at the top of event_map_maybe_GC. */
+   else if VG_BINT_CLO(arg, "--conflict-cache-size",
+                       HG_(clo_conflict_cache_size), 10*1000, 10*1000*1000) {}
+
+   /* "stuvwx" --> stuvwx (binary) */
+   else if VG_STR_CLO(arg, "--hg-sanity-flags", tmp_str) {
+      Int j;
+   
+      if (6 != VG_(strlen)(tmp_str)) {
+         VG_(message)(Vg_UserMsg, 
+                      "--hg-sanity-flags argument must have 6 digits");
+         return False;
+      }
+      for (j = 0; j < 6; j++) {
+         if      ('0' == tmp_str[j]) { /* do nothing */ }
+         else if ('1' == tmp_str[j]) HG_(clo_sanity_flags) |= (1 << (6-1-j));
+         else {
+            VG_(message)(Vg_UserMsg, "--hg-sanity-flags argument can "
+                                     "only contain 0s and 1s");
+            return False;
+         }
+      }
+      if (0) VG_(printf)("XXX sanity flags: 0x%lx\n", HG_(clo_sanity_flags));
+   }
+
+   else 
+      return VG_(replacement_malloc_process_cmd_line_option)(arg);
+
+   return True;
+}
+
+static void hg_print_usage ( void )
+{
+   VG_(printf)(
+"    --track-lockorders=no|yes show lock ordering errors? [yes]\n"
+"    --show-conflicts=no|yes   show both stack traces in a race? [yes]\n"
+"    --conflict-cache-size=N   size of conflict history cache [1000000]\n"
+   );
+   VG_(replacement_malloc_print_usage)();
+}
+
+static void hg_print_debug_usage ( void )
+{
+   VG_(replacement_malloc_print_debug_usage)();
+   VG_(printf)("    --cmp-race-err-addrs=no|yes  are data addresses in "
+               "race errors significant? [no]\n");
+   VG_(printf)("    --hg-sanity-flags=<XXXXXX>   sanity check "
+               "  at events (X = 0|1) [000000]\n");
+   VG_(printf)("    --hg-sanity-flags values:\n");
+   VG_(printf)("       010000   after changes to "
+               "lock-order-acquisition-graph\n");
+   VG_(printf)("       001000   at memory accesses (NB: not currently used)\n");
+   VG_(printf)("       000100   at mem permission setting for "
+               "ranges >= %d bytes\n", SCE_BIGRANGE_T);
+   VG_(printf)("       000010   at lock/unlock events\n");
+   VG_(printf)("       000001   at thread create/join events\n");
+}
+
+static void hg_post_clo_init ( void )
+{
+}
+
+static void hg_fini ( Int exitcode )
+{
+   if (SHOW_DATA_STRUCTURES)
+      pp_everything( PP_ALL, "SK_(fini)" );
+   if (HG_(clo_sanity_flags))
+      all__sanity_check("SK_(fini)");
+
+   if (VG_(clo_verbosity) >= 2) {
+
+      if (1) {
+         VG_(printf)("\n");
+         HG_(ppWSUstats)( univ_tsets, "univ_tsets" );
+         VG_(printf)("\n");
+         HG_(ppWSUstats)( univ_lsets, "univ_lsets" );
+         VG_(printf)("\n");
+         HG_(ppWSUstats)( univ_laog,  "univ_laog" );
+      }
+
+      //zz       VG_(printf)("\n");
+      //zz       VG_(printf)(" hbefore: %'10lu queries\n",        stats__hbefore_queries);
+      //zz       VG_(printf)(" hbefore: %'10lu cache 0 hits\n",   stats__hbefore_cache0s);
+      //zz       VG_(printf)(" hbefore: %'10lu cache > 0 hits\n", stats__hbefore_cacheNs);
+      //zz       VG_(printf)(" hbefore: %'10lu graph searches\n", stats__hbefore_gsearches);
+      //zz       VG_(printf)(" hbefore: %'10lu   of which slow\n",
+      //zz                   stats__hbefore_gsearches - stats__hbefore_gsearchFs);
+      //zz       VG_(printf)(" hbefore: %'10lu stack high water mark\n",
+      //zz                   stats__hbefore_stk_hwm);
+      //zz       VG_(printf)(" hbefore: %'10lu cache invals\n",   stats__hbefore_invals);
+      //zz       VG_(printf)(" hbefore: %'10lu probes\n",         stats__hbefore_probes);
+
+      VG_(printf)("\n");
+      VG_(printf)("        locksets: %'8d unique lock sets\n",
+                  (Int)HG_(cardinalityWSU)( univ_lsets ));
+      VG_(printf)("      threadsets: %'8d unique thread sets\n",
+                  (Int)HG_(cardinalityWSU)( univ_tsets ));
+      VG_(printf)("       univ_laog: %'8d unique lock sets\n",
+                  (Int)HG_(cardinalityWSU)( univ_laog ));
+
+      //VG_(printf)("L(ast)L(ock) map: %'8lu inserts (%d map size)\n",
+      //            stats__ga_LL_adds,
+      //            (Int)(ga_to_lastlock ? VG_(sizeFM)( ga_to_lastlock ) : 0) );
+
+      VG_(printf)("  LockN-to-P map: %'8llu queries (%llu map size)\n",
+                  HG_(stats__LockN_to_P_queries),
+                  HG_(stats__LockN_to_P_get_map_size)() );
+
+      VG_(printf)("string table map: %'8llu queries (%llu map size)\n",
+                  HG_(stats__string_table_queries),
+                  HG_(stats__string_table_get_map_size)() );
+      VG_(printf)("            LAOG: %'8d map size\n",
+                  (Int)(laog ? VG_(sizeFM)( laog ) : 0));
+      VG_(printf)(" LAOG exposition: %'8d map size\n",
+                  (Int)(laog_exposition ? VG_(sizeFM)( laog_exposition ) : 0));
+      VG_(printf)("           locks: %'8lu acquires, "
+                  "%'lu releases\n",
+                  stats__lockN_acquires,
+                  stats__lockN_releases
+                 );
+      VG_(printf)("   sanity checks: %'8lu\n", stats__sanity_checks);
+
+      VG_(printf)("\n");
+      libhb_shutdown(True);
+   }
+}
+
+/* FIXME: move these somewhere sane */
+
+static
+void for_libhb__get_stacktrace ( Thr* hbt, Addr* frames, UWord nRequest )
+{
+   Thread*     thr;
+   ThreadId    tid;
+   UWord       nActual;
+   tl_assert(hbt);
+   thr = libhb_get_Thr_opaque( hbt );
+   tl_assert(thr);
+   tid = map_threads_maybe_reverse_lookup_SLOW(thr);
+   nActual = (UWord)VG_(get_StackTrace)( tid, frames, (UInt)nRequest,
+                                         NULL, NULL, 0 );
+   tl_assert(nActual <= nRequest);
+   for (; nActual < nRequest; nActual++)
+      frames[nActual] = 0;
+}
+
+static
+ExeContext*  for_libhb__get_EC ( Thr* hbt )
+{
+   Thread*     thr;
+   ThreadId    tid;
+   ExeContext* ec;
+   tl_assert(hbt);
+   thr = libhb_get_Thr_opaque( hbt );
+   tl_assert(thr);
+   tid = map_threads_maybe_reverse_lookup_SLOW(thr);
+   ec = VG_(record_ExeContext)( tid, 0 );
+   return ec;
+}
+
+
+static void hg_pre_clo_init ( void )
+{
+   Thr* hbthr_root;
+   VG_(details_name)            ("Helgrind");
+   VG_(details_version)         (NULL);
+   VG_(details_description)     ("a thread error detector");
+   VG_(details_copyright_author)(
+      "Copyright (C) 2007-2009, and GNU GPL'd, by OpenWorks LLP et al.");
+   VG_(details_bug_reports_to)  (VG_BUGS_TO);
+   VG_(details_avg_translation_sizeB) ( 200 );
+
+   VG_(basic_tool_funcs)          (hg_post_clo_init,
+                                   hg_instrument,
+                                   hg_fini);
+
+   VG_(needs_core_errors)         ();
+   VG_(needs_tool_errors)         (HG_(eq_Error),
+                                   HG_(pp_Error),
+                                   False,/*show TIDs for errors*/
+                                   HG_(update_extra),
+                                   HG_(recognised_suppression),
+                                   HG_(read_extra_suppression_info),
+                                   HG_(error_matches_suppression),
+                                   HG_(get_error_name),
+                                   HG_(print_extra_suppression_info));
+
+   VG_(needs_command_line_options)(hg_process_cmd_line_option,
+                                   hg_print_usage,
+                                   hg_print_debug_usage);
+   VG_(needs_client_requests)     (hg_handle_client_request);
+
+   // FIXME?
+   //VG_(needs_sanity_checks)       (hg_cheap_sanity_check,
+   //                                hg_expensive_sanity_check);
+
+   VG_(needs_malloc_replacement)  (hg_cli__malloc,
+                                   hg_cli____builtin_new,
+                                   hg_cli____builtin_vec_new,
+                                   hg_cli__memalign,
+                                   hg_cli__calloc,
+                                   hg_cli__free,
+                                   hg_cli____builtin_delete,
+                                   hg_cli____builtin_vec_delete,
+                                   hg_cli__realloc,
+                                   hg_cli_malloc_usable_size,
+                                   HG_CLI__MALLOC_REDZONE_SZB );
+
+   /* 21 Dec 08: disabled this; it mostly causes H to start more
+      slowly and use significantly more memory, without very often
+      providing useful results.  The user can request to load this
+      information manually with --read-var-info=yes. */
+   if (0) VG_(needs_var_info)(); /* optional */
+
+   VG_(track_new_mem_startup)     ( evh__new_mem_w_perms );
+   VG_(track_new_mem_stack_signal)( evh__new_mem_w_tid );
+   VG_(track_new_mem_brk)         ( evh__new_mem_w_tid );
+   VG_(track_new_mem_mmap)        ( evh__new_mem_w_perms );
+   VG_(track_new_mem_stack)       ( evh__new_mem );
+
+   // FIXME: surely this isn't thread-aware
+   VG_(track_copy_mem_remap)      ( shadow_mem_copy_range );
+
+   VG_(track_change_mem_mprotect) ( evh__set_perms );
+
+   VG_(track_die_mem_stack_signal)( evh__die_mem );
+   VG_(track_die_mem_brk)         ( evh__die_mem );
+   VG_(track_die_mem_munmap)      ( evh__die_mem );
+   VG_(track_die_mem_stack)       ( evh__die_mem );
+
+   // FIXME: what is this for?
+   VG_(track_ban_mem_stack)       (NULL);
+
+   VG_(track_pre_mem_read)        ( evh__pre_mem_read );
+   VG_(track_pre_mem_read_asciiz) ( evh__pre_mem_read_asciiz );
+   VG_(track_pre_mem_write)       ( evh__pre_mem_write );
+   VG_(track_post_mem_write)      (NULL);
+
+   /////////////////
+
+   VG_(track_pre_thread_ll_create)( evh__pre_thread_ll_create );
+   VG_(track_pre_thread_ll_exit)  ( evh__pre_thread_ll_exit );
+
+   VG_(track_start_client_code)( evh__start_client_code );
+   VG_(track_stop_client_code)( evh__stop_client_code );
+
+   /////////////////////////////////////////////
+   hbthr_root = libhb_init( for_libhb__get_stacktrace, 
+                            for_libhb__get_EC );
+   /////////////////////////////////////////////
+
+   initialise_data_structures(hbthr_root);
+
+   /* Ensure that requirements for "dodgy C-as-C++ style inheritance"
+      as described in comments at the top of pub_tool_hashtable.h, are
+      met.  Blargh. */
+   tl_assert( sizeof(void*) == sizeof(struct _MallocMeta*) );
+   tl_assert( sizeof(UWord) == sizeof(Addr) );
+   hg_mallocmeta_table
+      = VG_(HT_construct)( "hg_malloc_metadata_table" );
+
+}
+
+VG_DETERMINE_INTERFACE_VERSION(hg_pre_clo_init)
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                hg_main.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/hg_wordset.c.svn-base b/helgrind/.svn/text-base/hg_wordset.c.svn-base
new file mode 100644
index 0000000..5d33004
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_wordset.c.svn-base
@@ -0,0 +1,860 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Sets of words, with unique set identifiers.                  ---*/
+/*---                                                 hg_wordset.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks LLP
+       info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_threadstate.h"
+#include "pub_tool_wordfm.h"
+
+#include "hg_basics.h"
+#include "hg_wordset.h"     /* self */
+
+//------------------------------------------------------------------//
+//--- Word Cache                                                 ---//
+//------------------------------------------------------------------//
+
+typedef
+   struct { UWord arg1; UWord arg2; UWord res; }
+   WCacheEnt;
+
+/* Each cache is a fixed sized array of N_WCACHE_STAT_MAX entries.
+   However only the first .dynMax are used.  This is because at some
+   point, expanding the cache further overall gives a slowdown because
+   searching more entries more than negates any performance advantage
+   from caching those entries in the first place.  Hence use .dynMax
+   to allow the size of the cache(s) to be set differently for each
+   different WordSetU. */
+#define N_WCACHE_STAT_MAX 32
+typedef
+   struct {
+      WCacheEnt ent[N_WCACHE_STAT_MAX];
+      UWord     dynMax; /* 1 .. N_WCACHE_STAT_MAX inclusive */
+      UWord     inUse;  /* 0 .. dynMax inclusive */
+   }
+   WCache;
+
+#define WCache_INIT(_zzcache,_zzdynmax)                              \
+   do {                                                              \
+      tl_assert((_zzdynmax) >= 1);                                   \
+      tl_assert((_zzdynmax) <= N_WCACHE_STAT_MAX);                   \
+      (_zzcache).dynMax = (_zzdynmax);                               \
+      (_zzcache).inUse = 0;                                          \
+   } while (0)
+
+#define WCache_LOOKUP_AND_RETURN(_retty,_zzcache,_zzarg1,_zzarg2)    \
+   do {                                                              \
+      UWord   _i;                                                    \
+      UWord   _arg1  = (UWord)(_zzarg1);                             \
+      UWord   _arg2  = (UWord)(_zzarg2);                             \
+      WCache* _cache = &(_zzcache);                                  \
+      tl_assert(_cache->dynMax >= 1);                                \
+      tl_assert(_cache->dynMax <= N_WCACHE_STAT_MAX);                \
+      tl_assert(_cache->inUse >= 0);                                 \
+      tl_assert(_cache->inUse <= _cache->dynMax);                    \
+      if (_cache->inUse > 0) {                                       \
+         if (_cache->ent[0].arg1 == _arg1                            \
+             && _cache->ent[0].arg2 == _arg2)                        \
+            return (_retty)_cache->ent[0].res;                       \
+         for (_i = 1; _i < _cache->inUse; _i++) {                    \
+            if (_cache->ent[_i].arg1 == _arg1                        \
+                && _cache->ent[_i].arg2 == _arg2) {                  \
+               WCacheEnt tmp     = _cache->ent[_i-1];                \
+               _cache->ent[_i-1] = _cache->ent[_i];                  \
+               _cache->ent[_i]   = tmp;                              \
+               return (_retty)_cache->ent[_i-1].res;                 \
+            }                                                        \
+         }                                                           \
+      }                                                              \
+   } while (0)
+
+#define WCache_UPDATE(_zzcache,_zzarg1,_zzarg2,_zzresult)            \
+   do {                                                              \
+      Word    _i;                                                    \
+      UWord   _arg1  = (UWord)(_zzarg1);                             \
+      UWord   _arg2  = (UWord)(_zzarg2);                             \
+      UWord   _res   = (UWord)(_zzresult);                           \
+      WCache* _cache = &(_zzcache);                                  \
+      tl_assert(_cache->dynMax >= 1);                                \
+      tl_assert(_cache->dynMax <= N_WCACHE_STAT_MAX);                \
+      tl_assert(_cache->inUse >= 0);                                 \
+      tl_assert(_cache->inUse <= _cache->dynMax);                    \
+      if (_cache->inUse < _cache->dynMax)                            \
+         _cache->inUse++;                                            \
+      for (_i = _cache->inUse-1; _i >= 1; _i--)                      \
+         _cache->ent[_i] = _cache->ent[_i-1];                        \
+      _cache->ent[0].arg1 = _arg1;                                   \
+      _cache->ent[0].arg2 = _arg2;                                   \
+      _cache->ent[0].res  = _res;                                    \
+   } while (0)
+
+
+//------------------------------------------------------------------//
+//---                          WordSet                           ---//
+//---                       Implementation                       ---//
+//------------------------------------------------------------------//
+
+typedef
+   struct {
+      WordSetU* owner; /* for sanity checking */
+      UWord*    words;
+      UWord     size; /* Really this should be SizeT */
+   }
+   WordVec;
+
+/* ix2vec[0 .. ix2vec_used-1] are pointers to the lock sets (WordVecs)
+   really.  vec2ix is the inverse mapping, mapping WordVec* to the
+   corresponding ix2vec entry number.  The two mappings are mutually
+   redundant. */
+struct _WordSetU {
+      void*     (*alloc)(HChar*,SizeT);
+      HChar*    cc;
+      void      (*dealloc)(void*);
+      WordFM*   vec2ix; /* WordVec-to-WordSet mapping tree */
+      WordVec** ix2vec; /* WordSet-to-WordVec mapping array */
+      UWord     ix2vec_size;
+      UWord     ix2vec_used;
+      WordSet   empty; /* cached, for speed */
+      /* Caches for some operations */
+      WCache    cache_addTo;
+      WCache    cache_delFrom;
+      WCache    cache_intersect;
+      WCache    cache_minus;
+      /* Stats */
+      UWord     n_add;
+      UWord     n_add_uncached;
+      UWord     n_del;
+      UWord     n_del_uncached;
+      UWord     n_union;
+      UWord     n_intersect;
+      UWord     n_intersect_uncached;
+      UWord     n_minus;
+      UWord     n_minus_uncached;
+      UWord     n_elem;
+      UWord     n_doubleton;
+      UWord     n_isEmpty;
+      UWord     n_isSingleton;
+      UWord     n_anyElementOf;
+      UWord     n_isSubsetOf;
+   };
+
+/* Create a new WordVec of the given size. */
+
+static WordVec* new_WV_of_size ( WordSetU* wsu, UWord sz )
+{
+   WordVec* wv;
+   tl_assert(sz >= 0);
+   wv = wsu->alloc( wsu->cc, sizeof(WordVec) );
+   wv->owner = wsu;
+   wv->words = NULL;
+   wv->size = sz;
+   if (sz > 0) {
+     wv->words = wsu->alloc( wsu->cc, (SizeT)sz * sizeof(UWord) );
+   }
+   return wv;
+}
+
+static void delete_WV ( WordVec* wv )
+{
+   void (*dealloc)(void*) = wv->owner->dealloc;
+   if (wv->words) {
+      dealloc(wv->words);
+   }
+   dealloc(wv);
+}
+static void delete_WV_for_FM ( UWord wv ) {
+   delete_WV( (WordVec*)wv );
+}
+
+static Word cmp_WordVecs_for_FM ( UWord wv1W, UWord wv2W )
+{
+   UWord    i;
+   WordVec* wv1    = (WordVec*)wv1W;
+   WordVec* wv2    = (WordVec*)wv2W;
+   UWord    common = wv1->size < wv2->size ? wv1->size : wv2->size;
+   for (i = 0; i < common; i++) {
+      if (wv1->words[i] == wv2->words[i])
+         continue;
+      if (wv1->words[i] < wv2->words[i])
+         return -1;
+      if (wv1->words[i] > wv2->words[i])
+         return 1;
+      tl_assert(0);
+   }
+   /* Ok, the common sections are identical.  So now consider the
+      tails.  Both sets are considered to finish in an implied
+      sequence of -infinity. */
+   if (wv1->size < wv2->size) {
+      tl_assert(common == wv1->size);
+      return -1; /* impliedly, wv1 contains some -infinitys in places
+                    where wv2 doesn't. */
+   }
+   if (wv1->size > wv2->size) {
+      tl_assert(common == wv2->size);
+      return 1;
+   }
+   tl_assert(common == wv1->size);
+   return 0; /* identical */
+}
+
+static void ensure_ix2vec_space ( WordSetU* wsu )
+{
+   UInt      i, new_sz;
+   WordVec** new_vec;
+   tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
+   if (wsu->ix2vec_used < wsu->ix2vec_size)
+      return;
+   new_sz = 2 * wsu->ix2vec_size;
+   if (new_sz == 0) new_sz = 2;
+   new_vec = wsu->alloc( wsu->cc, new_sz * sizeof(WordVec*) );
+   tl_assert(new_vec);
+   for (i = 0; i < wsu->ix2vec_size; i++)
+      new_vec[i] = wsu->ix2vec[i];
+   if (wsu->ix2vec)
+      wsu->dealloc(wsu->ix2vec);
+   wsu->ix2vec = new_vec;
+   wsu->ix2vec_size = new_sz;
+}
+
+/* Index into a WordSetU, doing the obvious range check.  Failure of
+   the assertions marked XXX and YYY is an indication of passing the
+   wrong WordSetU* in the public API of this module. */
+static WordVec* do_ix2vec ( WordSetU* wsu, WordSet ws )
+{
+   WordVec* wv;
+   tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
+   if (wsu->ix2vec_used > 0)
+      tl_assert(wsu->ix2vec);
+   /* If this assertion fails, it may mean you supplied a 'ws'
+      that does not come from the 'wsu' universe. */
+   tl_assert(ws < wsu->ix2vec_used); /* XXX */
+   wv = wsu->ix2vec[ws];
+   /* Make absolutely sure that 'ws' is a member of 'wsu'. */
+   tl_assert(wv);
+   tl_assert(wv->owner == wsu); /* YYY */
+   return wv;
+}
+
+/* See if wv is contained within wsu.  If so, deallocate wv and return
+   the index of the already-present copy.  If not, add wv to both the
+   vec2ix and ix2vec mappings and return its index. 
+*/
+static WordSet add_or_dealloc_WordVec( WordSetU* wsu, WordVec* wv_new )
+{
+   Bool     have;
+   WordVec* wv_old;
+   UWord/*Set*/ ix_old = -1;
+   /* Really WordSet, but need something that can safely be casted to
+      a Word* in the lookupFM.  Making it WordSet (which is 32 bits)
+      causes failures on a 64-bit platform. */
+   tl_assert(wv_new->owner == wsu);
+   have = VG_(lookupFM)( wsu->vec2ix, 
+                         (Word*)&wv_old, (Word*)&ix_old,
+                         (Word)wv_new );
+   if (have) {
+      tl_assert(wv_old != wv_new);
+      tl_assert(wv_old);
+      tl_assert(wv_old->owner == wsu);
+      tl_assert(ix_old < wsu->ix2vec_used);
+      tl_assert(wsu->ix2vec[ix_old] == wv_old);
+      delete_WV( wv_new );
+      return (WordSet)ix_old;
+   } else {
+      ensure_ix2vec_space( wsu );
+      tl_assert(wsu->ix2vec);
+      tl_assert(wsu->ix2vec_used < wsu->ix2vec_size);
+      wsu->ix2vec[wsu->ix2vec_used] = wv_new;
+      VG_(addToFM)( wsu->vec2ix, (Word)wv_new, (Word)wsu->ix2vec_used );
+      if (0) VG_(printf)("aodW %d\n", (Int)wsu->ix2vec_used );
+      wsu->ix2vec_used++;
+      tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
+      return (WordSet)(wsu->ix2vec_used - 1);
+   }
+}
+
+
+WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( HChar*, SizeT ),
+                             HChar* cc,
+                             void  (*dealloc)(void*),
+                             Word  cacheSize )
+{
+   WordSetU* wsu;
+   WordVec*  empty;
+
+   wsu          = alloc_nofail( cc, sizeof(WordSetU) );
+   VG_(memset)( wsu, 0, sizeof(WordSetU) );
+   wsu->alloc   = alloc_nofail;
+   wsu->cc      = cc;
+   wsu->dealloc = dealloc;
+   wsu->vec2ix  = VG_(newFM)( alloc_nofail, cc,
+                              dealloc, cmp_WordVecs_for_FM );
+   wsu->ix2vec_used = 0;
+   wsu->ix2vec_size = 0;
+   wsu->ix2vec      = NULL;
+   WCache_INIT(wsu->cache_addTo,     cacheSize);
+   WCache_INIT(wsu->cache_delFrom,   cacheSize);
+   WCache_INIT(wsu->cache_intersect, cacheSize);
+   WCache_INIT(wsu->cache_minus,     cacheSize);
+   empty = new_WV_of_size( wsu, 0 );
+   wsu->empty = add_or_dealloc_WordVec( wsu, empty );
+
+   return wsu;
+}
+
+void HG_(deleteWordSetU) ( WordSetU* wsu )
+{
+   void (*dealloc)(void*) = wsu->dealloc;
+   tl_assert(wsu->vec2ix);
+   VG_(deleteFM)( wsu->vec2ix, delete_WV_for_FM, NULL/*val-finalizer*/ );
+   if (wsu->ix2vec)
+      dealloc(wsu->ix2vec);
+   dealloc(wsu);
+}
+
+WordSet HG_(emptyWS) ( WordSetU* wsu )
+{
+   return wsu->empty;
+}
+
+Bool HG_(isEmptyWS) ( WordSetU* wsu, WordSet ws )
+{
+   WordVec* wv = do_ix2vec( wsu, ws );
+   wsu->n_isEmpty++;
+   if (wv->size == 0) {
+      tl_assert(ws == wsu->empty);
+      return True;
+   } else {
+      tl_assert(ws != wsu->empty);
+      return False;
+   }
+}
+
+Bool HG_(isSingletonWS) ( WordSetU* wsu, WordSet ws, UWord w )
+{
+   WordVec* wv;
+   tl_assert(wsu);
+   wsu->n_isSingleton++;
+   wv = do_ix2vec( wsu, ws );
+   return (Bool)(wv->size == 1 && wv->words[0] == w);
+}
+
+UWord HG_(cardinalityWS) ( WordSetU* wsu, WordSet ws )
+{
+   WordVec* wv;
+   tl_assert(wsu);
+   wv = do_ix2vec( wsu, ws );
+   tl_assert(wv->size >= 0);
+   return wv->size;
+}
+
+UWord HG_(anyElementOfWS) ( WordSetU* wsu, WordSet ws )
+{
+   WordVec* wv;
+   tl_assert(wsu);
+   wsu->n_anyElementOf++;
+   wv = do_ix2vec( wsu, ws );
+   tl_assert(wv->size >= 1);
+   return wv->words[0];
+}
+
+UWord HG_(cardinalityWSU) ( WordSetU* wsu )
+{
+   tl_assert(wsu);
+   return wsu->ix2vec_used;
+}
+
+void HG_(getPayloadWS) ( /*OUT*/UWord** words, /*OUT*/UWord* nWords, 
+                         WordSetU* wsu, WordSet ws )
+{
+   WordVec* wv;
+   tl_assert(wsu);
+   wv = do_ix2vec( wsu, ws );
+   tl_assert(wv->size >= 0);
+   *nWords = wv->size;
+   *words  = wv->words;
+}
+
+Bool HG_(plausibleWS) ( WordSetU* wsu, WordSet ws )
+{
+   if (wsu == NULL) return False;
+   if (ws < 0 || ws >= wsu->ix2vec_used)
+      return False;
+   return True;
+}
+
+Bool HG_(saneWS_SLOW) ( WordSetU* wsu, WordSet ws )
+{
+   WordVec* wv;
+   UWord    i;
+   if (wsu == NULL) return False;
+   if (ws < 0 || ws >= wsu->ix2vec_used)
+      return False;
+   wv = do_ix2vec( wsu, ws );
+   /* can never happen .. do_ix2vec will assert instead.  Oh well. */
+   if (wv->owner != wsu) return False;
+   if (wv->size < 0) return False;
+   if (wv->size > 0) {
+      for (i = 0; i < wv->size-1; i++) {
+         if (wv->words[i] >= wv->words[i+1])
+            return False;
+      }
+   }
+   return True;
+}
+
+Bool HG_(elemWS) ( WordSetU* wsu, WordSet ws, UWord w )
+{
+   UWord    i;
+   WordVec* wv = do_ix2vec( wsu, ws );
+   wsu->n_elem++;
+   for (i = 0; i < wv->size; i++) {
+      if (wv->words[i] == w)
+         return True;
+   }
+   return False;
+}
+
+WordSet HG_(doubletonWS) ( WordSetU* wsu, UWord w1, UWord w2 )
+{
+   WordVec* wv;
+   wsu->n_doubleton++;
+   if (w1 == w2) {
+      wv = new_WV_of_size(wsu, 1);
+      wv->words[0] = w1;
+   }
+   else if (w1 < w2) {
+      wv = new_WV_of_size(wsu, 2);
+      wv->words[0] = w1;
+      wv->words[1] = w2;
+   }
+   else {
+      tl_assert(w1 > w2);
+      wv = new_WV_of_size(wsu, 2);
+      wv->words[0] = w2;
+      wv->words[1] = w1;
+   }
+   return add_or_dealloc_WordVec( wsu, wv );
+}
+
+WordSet HG_(singletonWS) ( WordSetU* wsu, UWord w )
+{
+   return HG_(doubletonWS)( wsu, w, w );
+}
+
+WordSet HG_(isSubsetOf) ( WordSetU* wsu, WordSet small, WordSet big )
+{
+   wsu->n_isSubsetOf++;
+   return small == HG_(intersectWS)( wsu, small, big );
+}
+
+void HG_(ppWS) ( WordSetU* wsu, WordSet ws )
+{
+   UWord    i;
+   WordVec* wv;
+   tl_assert(wsu);
+   wv = do_ix2vec( wsu, ws );
+   VG_(printf)("{");
+   for (i = 0; i < wv->size; i++) {
+      VG_(printf)("%p", (void*)wv->words[i]);
+      if (i < wv->size-1)
+         VG_(printf)(",");
+   }
+   VG_(printf)("}");
+}
+
+void HG_(ppWSUstats) ( WordSetU* wsu, HChar* name )
+{
+   VG_(printf)("   WordSet \"%s\":\n", name);
+   VG_(printf)("      addTo        %10lu (%lu uncached)\n",
+               wsu->n_add, wsu->n_add_uncached);
+   VG_(printf)("      delFrom      %10lu (%lu uncached)\n", 
+               wsu->n_del, wsu->n_del_uncached);
+   VG_(printf)("      union        %10lu\n", wsu->n_union);
+   VG_(printf)("      intersect    %10lu (%lu uncached) "
+               "[nb. incl isSubsetOf]\n", 
+               wsu->n_intersect, wsu->n_intersect_uncached);
+   VG_(printf)("      minus        %10lu (%lu uncached)\n",
+               wsu->n_minus, wsu->n_minus_uncached);
+   VG_(printf)("      elem         %10lu\n",   wsu->n_elem);
+   VG_(printf)("      doubleton    %10lu\n",   wsu->n_doubleton);
+   VG_(printf)("      isEmpty      %10lu\n",   wsu->n_isEmpty);
+   VG_(printf)("      isSingleton  %10lu\n",   wsu->n_isSingleton);
+   VG_(printf)("      anyElementOf %10lu\n",   wsu->n_anyElementOf);
+   VG_(printf)("      isSubsetOf   %10lu\n",   wsu->n_isSubsetOf);
+}
+
+WordSet HG_(addToWS) ( WordSetU* wsu, WordSet ws, UWord w )
+{
+   UWord    k, j;
+   WordVec* wv_new;
+   WordVec* wv;
+   WordSet  result = (WordSet)(-1); /* bogus */
+
+   wsu->n_add++;
+   WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_addTo, ws, w);
+   wsu->n_add_uncached++;
+
+   /* If already present, this is a no-op. */
+   wv = do_ix2vec( wsu, ws );
+   for (k = 0; k < wv->size; k++) {
+      if (wv->words[k] == w) {
+         result = ws;
+         goto out;
+      }
+   }
+   /* Ok, not present.  Build a new one ... */
+   wv_new = new_WV_of_size( wsu, wv->size + 1 );
+   k = j = 0;
+   for (; k < wv->size && wv->words[k] < w; k++) {
+      wv_new->words[j++] = wv->words[k];
+   }
+   wv_new->words[j++] = w;
+   for (; k < wv->size; k++) {
+      tl_assert(wv->words[k] > w);
+      wv_new->words[j++] = wv->words[k];
+   }
+   tl_assert(j == wv_new->size);
+
+   /* Find any existing copy, or add the new one. */
+   result = add_or_dealloc_WordVec( wsu, wv_new );
+   tl_assert(result != (WordSet)(-1));
+
+  out:
+   WCache_UPDATE(wsu->cache_addTo, ws, w, result);
+   return result;
+}
+
+WordSet HG_(delFromWS) ( WordSetU* wsu, WordSet ws, UWord w )
+{
+   UWord    i, j, k;
+   WordVec* wv_new;
+   WordSet  result = (WordSet)(-1); /* bogus */
+   WordVec* wv = do_ix2vec( wsu, ws );
+
+   wsu->n_del++;
+
+   /* special case empty set */
+   if (wv->size == 0) {
+      tl_assert(ws == wsu->empty);
+      return ws;
+   }
+
+   WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_delFrom, ws, w);
+   wsu->n_del_uncached++;
+
+   /* If not already present, this is a no-op. */
+   for (i = 0; i < wv->size; i++) {
+      if (wv->words[i] == w)
+         break;
+   }
+   if (i == wv->size) {
+      result = ws;
+      goto out;
+   }
+   /* So w is present in ws, and the new set will be one element
+      smaller. */
+   tl_assert(i >= 0 && i < wv->size);
+   tl_assert(wv->size > 0);
+
+   wv_new = new_WV_of_size( wsu, wv->size - 1 );
+   j = k = 0;
+   for (; j < wv->size; j++) {
+      if (j == i)
+         continue;
+      wv_new->words[k++] = wv->words[j];
+   }
+   tl_assert(k == wv_new->size);
+
+   result = add_or_dealloc_WordVec( wsu, wv_new );
+   if (wv->size == 1) {
+      tl_assert(result == wsu->empty);
+   }
+
+  out:
+   WCache_UPDATE(wsu->cache_delFrom, ws, w, result);
+   return result;
+}
+
+WordSet HG_(unionWS) ( WordSetU* wsu, WordSet ws1, WordSet ws2 )
+{
+   UWord    i1, i2, k, sz;
+   WordVec* wv_new;
+   WordVec* wv1 = do_ix2vec( wsu, ws1 );
+   WordVec* wv2 = do_ix2vec( wsu, ws2 );
+   wsu->n_union++;
+   sz = 0;
+   i1 = i2 = 0;
+   while (1) {
+      if (i1 >= wv1->size || i2 >= wv2->size)
+         break;
+      sz++;
+      if (wv1->words[i1] < wv2->words[i2]) {
+         i1++;
+      } else 
+      if (wv1->words[i1] > wv2->words[i2]) {
+         i2++;
+      } else {
+         i1++;
+         i2++;
+      }
+   }
+   tl_assert(i1 <= wv1->size);
+   tl_assert(i2 <= wv2->size);
+   tl_assert(i1 == wv1->size || i2 == wv2->size);
+   if (i1 == wv1->size && i2 < wv2->size) {
+      sz += (wv2->size - i2);
+   }
+   if (i2 == wv2->size && i1 < wv1->size) {
+      sz += (wv1->size - i1);
+   }
+
+   wv_new = new_WV_of_size( wsu, sz );
+   k = 0;
+
+   i1 = i2 = 0;
+   while (1) {
+      if (i1 >= wv1->size || i2 >= wv2->size)
+         break;
+      if (wv1->words[i1] < wv2->words[i2]) {
+         wv_new->words[k++] = wv1->words[i1];
+         i1++;
+      } else 
+      if (wv1->words[i1] > wv2->words[i2]) {
+         wv_new->words[k++] = wv2->words[i2];
+         i2++;
+      } else {
+         wv_new->words[k++] = wv1->words[i1];
+         i1++;
+         i2++;
+      }
+   }
+   tl_assert(i1 <= wv1->size);
+   tl_assert(i2 <= wv2->size);
+   tl_assert(i1 == wv1->size || i2 == wv2->size);
+   if (i1 == wv1->size && i2 < wv2->size) {
+      while (i2 < wv2->size)
+         wv_new->words[k++] = wv2->words[i2++];
+   }
+   if (i2 == wv2->size && i1 < wv1->size) {
+      while (i1 < wv1->size)
+         wv_new->words[k++] = wv1->words[i1++];
+   }
+
+   tl_assert(k == sz);
+
+   return add_or_dealloc_WordVec( wsu, wv_new );
+}
+
+WordSet HG_(intersectWS) ( WordSetU* wsu, WordSet ws1, WordSet ws2 )
+{
+   UWord    i1, i2, k, sz;
+   WordSet  ws_new = (WordSet)(-1); /* bogus */
+   WordVec* wv_new;
+   WordVec* wv1; 
+   WordVec* wv2; 
+
+   wsu->n_intersect++;
+
+   /* Deal with an obvious case fast. */
+   if (ws1 == ws2)
+      return ws1;
+
+   /* Since intersect(x,y) == intersect(y,x), convert both variants to
+      the same query.  This reduces the number of variants the cache
+      has to deal with. */
+   if (ws1 > ws2) {
+      WordSet wst = ws1; ws1 = ws2; ws2 = wst;
+   }
+
+   WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_intersect, ws1, ws2);
+   wsu->n_intersect_uncached++;
+
+   wv1 = do_ix2vec( wsu, ws1 );
+   wv2 = do_ix2vec( wsu, ws2 );
+   sz = 0;
+   i1 = i2 = 0;
+   while (1) {
+      if (i1 >= wv1->size || i2 >= wv2->size)
+         break;
+      if (wv1->words[i1] < wv2->words[i2]) {
+         i1++;
+      } else 
+      if (wv1->words[i1] > wv2->words[i2]) {
+         i2++;
+      } else {
+         sz++;
+         i1++;
+         i2++;
+      }
+   }
+   tl_assert(i1 <= wv1->size);
+   tl_assert(i2 <= wv2->size);
+   tl_assert(i1 == wv1->size || i2 == wv2->size);
+
+   wv_new = new_WV_of_size( wsu, sz );
+   k = 0;
+
+   i1 = i2 = 0;
+   while (1) {
+      if (i1 >= wv1->size || i2 >= wv2->size)
+         break;
+      if (wv1->words[i1] < wv2->words[i2]) {
+         i1++;
+      } else 
+      if (wv1->words[i1] > wv2->words[i2]) {
+         i2++;
+      } else {
+         wv_new->words[k++] = wv1->words[i1];
+         i1++;
+         i2++;
+      }
+   }
+   tl_assert(i1 <= wv1->size);
+   tl_assert(i2 <= wv2->size);
+   tl_assert(i1 == wv1->size || i2 == wv2->size);
+
+   tl_assert(k == sz);
+
+   ws_new = add_or_dealloc_WordVec( wsu, wv_new );
+   if (sz == 0) {
+      tl_assert(ws_new == wsu->empty);
+   }
+
+   tl_assert(ws_new != (WordSet)(-1));
+   WCache_UPDATE(wsu->cache_intersect, ws1, ws2, ws_new);
+
+   return ws_new;
+}
+
+WordSet HG_(minusWS) ( WordSetU* wsu, WordSet ws1, WordSet ws2 )
+{
+   UWord    i1, i2, k, sz;
+   WordSet  ws_new = (WordSet)(-1); /* bogus */
+   WordVec* wv_new;
+   WordVec* wv1;
+   WordVec* wv2;
+   
+   wsu->n_minus++;
+   WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_minus, ws1, ws2);
+   wsu->n_minus_uncached++;
+
+   wv1 = do_ix2vec( wsu, ws1 );
+   wv2 = do_ix2vec( wsu, ws2 );
+   sz = 0;
+   i1 = i2 = 0;
+   while (1) {
+      if (i1 >= wv1->size || i2 >= wv2->size)
+         break;
+      if (wv1->words[i1] < wv2->words[i2]) {
+         sz++;
+         i1++;
+      } else 
+      if (wv1->words[i1] > wv2->words[i2]) {
+         i2++;
+      } else {
+         i1++;
+         i2++;
+      }
+   }
+   tl_assert(i1 <= wv1->size);
+   tl_assert(i2 <= wv2->size);
+   tl_assert(i1 == wv1->size || i2 == wv2->size);
+   if (i2 == wv2->size && i1 < wv1->size) {
+      sz += (wv1->size - i1);
+   }
+
+   wv_new = new_WV_of_size( wsu, sz );
+   k = 0;
+
+   i1 = i2 = 0;
+   while (1) {
+      if (i1 >= wv1->size || i2 >= wv2->size)
+         break;
+      if (wv1->words[i1] < wv2->words[i2]) {
+         wv_new->words[k++] = wv1->words[i1];
+         i1++;
+      } else 
+      if (wv1->words[i1] > wv2->words[i2]) {
+         i2++;
+      } else {
+         i1++;
+         i2++;
+      }
+   }
+   tl_assert(i1 <= wv1->size);
+   tl_assert(i2 <= wv2->size);
+   tl_assert(i1 == wv1->size || i2 == wv2->size);
+   if (i2 == wv2->size && i1 < wv1->size) {
+      while (i1 < wv1->size)
+         wv_new->words[k++] = wv1->words[i1++];
+   }
+
+   tl_assert(k == sz);
+
+   ws_new = add_or_dealloc_WordVec( wsu, wv_new );
+   if (sz == 0) {
+      tl_assert(ws_new == wsu->empty);
+   }
+
+   tl_assert(ws_new != (WordSet)(-1));
+   WCache_UPDATE(wsu->cache_minus, ws1, ws2, ws_new);
+
+   return ws_new;
+}
+
+static __attribute__((unused))
+void show_WS ( WordSetU* wsu, WordSet ws )
+{
+   UWord i;
+   WordVec* wv = do_ix2vec( wsu, ws );
+   VG_(printf)("#%u{", ws);
+   for (i = 0; i < wv->size; i++) {
+      VG_(printf)("%lu", wv->words[i]);
+      if (i < wv->size-1)
+         VG_(printf)(",");
+   }
+   VG_(printf)("}\n");
+}
+
+//------------------------------------------------------------------//
+//---                        end WordSet                         ---//
+//---                       Implementation                       ---//
+//------------------------------------------------------------------//
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             hg_wordset.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/hg_wordset.h.svn-base b/helgrind/.svn/text-base/hg_wordset.h.svn-base
new file mode 100644
index 0000000..1d1a72f
--- /dev/null
+++ b/helgrind/.svn/text-base/hg_wordset.h.svn-base
@@ -0,0 +1,101 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Sets of words, with unique set identifiers.                  ---*/
+/*---                                                 hg_wordset.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Helgrind, a Valgrind tool for detecting errors
+   in threaded programs.
+
+   Copyright (C) 2007-2009 OpenWorks LLP
+       info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+*/
+
+#ifndef __HG_WORDSET_H
+#define __HG_WORDSET_H
+
+//------------------------------------------------------------------//
+//---                          WordSet                           ---//
+//---                      Public Interface                      ---//
+//------------------------------------------------------------------//
+
+typedef  struct _WordSetU  WordSetU;  /* opaque */
+
+typedef  UInt              WordSet;   /* opaque, small int index */
+
+/* Allocate and initialise a WordSetU */
+WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( HChar*, SizeT ),
+                             HChar* cc,
+                             void  (*dealloc)(void*),
+                             Word  cacheSize );
+
+/* Free up the WordSetU. */
+void HG_(deleteWordSetU) ( WordSetU* );
+
+/* Get the number of elements in this WordSetU. */
+UWord HG_(cardinalityWSU) ( WordSetU* );
+
+/* Show performance stats for this WordSetU. */
+void HG_(ppWSUstats) ( WordSetU* wsu, HChar* name );
+
+
+/* Element-level operations on WordSets.  Note that the WordSet
+   numbers given out are 0, 1, 2, 3, etc, and as it happens 0 always
+   represents the empty set. */
+
+WordSet HG_(emptyWS)        ( WordSetU* );
+WordSet HG_(addToWS)        ( WordSetU*, WordSet, UWord );
+WordSet HG_(delFromWS)      ( WordSetU*, WordSet, UWord );
+WordSet HG_(unionWS)        ( WordSetU*, WordSet, WordSet );
+WordSet HG_(intersectWS)    ( WordSetU*, WordSet, WordSet );
+WordSet HG_(minusWS)        ( WordSetU*, WordSet, WordSet );
+Bool    HG_(isEmptyWS)      ( WordSetU*, WordSet );
+Bool    HG_(isSingletonWS)  ( WordSetU*, WordSet, UWord );
+UWord   HG_(anyElementOfWS) ( WordSetU*, WordSet );
+UWord   HG_(cardinalityWS)  ( WordSetU*, WordSet );
+Bool    HG_(elemWS)         ( WordSetU*, WordSet, UWord );
+WordSet HG_(doubletonWS)    ( WordSetU*, UWord, UWord );
+WordSet HG_(singletonWS)    ( WordSetU*, UWord );
+WordSet HG_(isSubsetOf)     ( WordSetU*, WordSet, WordSet );
+
+Bool    HG_(plausibleWS)    ( WordSetU*, WordSet );
+Bool    HG_(saneWS_SLOW)    ( WordSetU*, WordSet );
+
+void    HG_(ppWS)           ( WordSetU*, WordSet );
+void    HG_(getPayloadWS)   ( /*OUT*/UWord** words, /*OUT*/UWord* nWords, 
+                             WordSetU*, WordSet );
+
+
+//------------------------------------------------------------------//
+//---                        end WordSet                         ---//
+//---                      Public Interface                      ---//
+//------------------------------------------------------------------//
+
+#endif /* ! __HG_WORDSET_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             hg_wordset.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/libhb.h.svn-base b/helgrind/.svn/text-base/libhb.h.svn-base
new file mode 100644
index 0000000..b204883
--- /dev/null
+++ b/helgrind/.svn/text-base/libhb.h.svn-base
@@ -0,0 +1,156 @@
+
+/*--------------------------------------------------------------------*/
+/*--- LibHB: a library for implementing and checking               ---*/
+/*--- the happens-before relationship in concurrent programs.      ---*/
+/*---                                                 libhb_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of LibHB, a library for implementing and checking
+   the happens-before relationship in concurrent programs.
+
+   Copyright (C) 2008-2009 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __LIBHB_H
+#define __LIBHB_H
+
+/* Abstract to user: thread identifiers */
+/* typedef  struct _Thr  Thr; */ /* now in hg_lock_n_thread.h */
+
+/* Abstract to user: synchronisation objects */
+/* typedef  struct _SO  SO; */ /* now in hg_lock_n_thread.h */
+
+/* Initialise library; returns Thr* for root thread.  'shadow_alloc'
+   should never return NULL, instead it should simply not return if
+   they encounter an out-of-memory condition. */
+Thr* libhb_init (
+        void        (*get_stacktrace)( Thr*, Addr*, UWord ),
+        ExeContext* (*get_EC)( Thr* )
+     );
+
+/* Shut down the library, and print stats (in fact that's _all_
+   this is for.) */
+void libhb_shutdown ( Bool show_stats );
+
+/* Thread creation: returns Thr* for new thread */
+Thr* libhb_create ( Thr* parent );
+
+/* Thread async exit */
+void libhb_async_exit ( Thr* exitter );
+
+/* Synchronisation objects (abstract to caller) */
+
+/* Allocate a new one (alloc'd by library) */
+SO* libhb_so_alloc ( void );
+
+/* Dealloc one */
+void libhb_so_dealloc ( SO* so );
+
+/* Send a message via a sync object.  If strong_send is true, the
+   resulting inter-thread dependency seen by a future receiver of this
+   message will be a dependency on this thread only.  That is, in a
+   strong send, the VC inside the SO is replaced by the clock of the
+   sending thread.  For a weak send, the sender's VC is joined into
+   that already in the SO, if any.  This subtlety is needed to model
+   rwlocks: a strong send corresponds to releasing a rwlock that had
+   been w-held (or releasing a standard mutex).  A weak send
+   corresponds to releasing a rwlock that has been r-held.
+
+   (rationale): Since in general many threads may hold a rwlock in
+   r-mode, a weak send facility is necessary in order that the final
+   SO reflects the join of the VCs of all the threads releasing the
+   rwlock, rather than merely holding the VC of the most recent thread
+   to release it. */
+void libhb_so_send ( Thr* thr, SO* so, Bool strong_send );
+
+/* Recv a message from a sync object.  If strong_recv is True, the
+   resulting inter-thread dependency is considered adequate to induce
+   a h-b ordering on both reads and writes.  If it is False, the
+   implied h-b ordering exists only for reads, not writes.  This is
+   subtlety is required in order to support reader-writer locks: a
+   thread doing a write-acquire of a rwlock (or acquiring a normal
+   mutex) models this by doing a strong receive.  A thread doing a
+   read-acquire of a rwlock models this by doing a !strong_recv. */
+void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv );
+
+/* Has this SO ever been sent on? */
+Bool libhb_so_everSent ( SO* so );
+
+/* Memory accesses (1/2/4/8 byte size).  They report a race if one is
+   found. */
+#define LIBHB_WRITE_1(_thr,_a)    zsm_apply8___msm_write((_thr),(_a))
+#define LIBHB_WRITE_2(_thr,_a)    zsm_apply16___msm_write((_thr),(_a))
+#define LIBHB_WRITE_4(_thr,_a)    zsm_apply32___msm_write((_thr),(_a))
+#define LIBHB_WRITE_8(_thr,_a)    zsm_apply64___msm_write((_thr),(_a))
+#define LIBHB_WRITE_N(_thr,_a,_n) zsm_apply_range___msm_read((_thr),(_a),(_n))
+
+#define LIBHB_READ_1(_thr,_a)    zsm_apply8___msm_read((_thr),(_a))
+#define LIBHB_READ_2(_thr,_a)    zsm_apply16___msm_read((_thr),(_a))
+#define LIBHB_READ_4(_thr,_a)    zsm_apply32___msm_read((_thr),(_a))
+#define LIBHB_READ_8(_thr,_a)    zsm_apply64___msm_read((_thr),(_a))
+#define LIBHB_READ_N(_thr,_a,_n) zsm_apply_range___msm_read((_thr),(_a),(_n))
+
+void zsm_apply8___msm_write ( Thr* thr, Addr a );
+void zsm_apply16___msm_write ( Thr* thr, Addr a );
+void zsm_apply32___msm_write ( Thr* thr, Addr a );
+void zsm_apply64___msm_write ( Thr* thr, Addr a );
+void zsm_apply_range___msm_write ( Thr* thr,
+                                   Addr a, SizeT len );
+
+void zsm_apply8___msm_read ( Thr* thr, Addr a );
+void zsm_apply16___msm_read ( Thr* thr, Addr a );
+void zsm_apply32___msm_read ( Thr* thr, Addr a );
+void zsm_apply64___msm_read ( Thr* thr, Addr a );
+void zsm_apply_range___msm_read ( Thr* thr,
+                                  Addr a, SizeT len );
+
+
+/* Set memory address ranges to new (freshly allocated), or noaccess
+   (no longer accessible). */
+void libhb_range_new      ( Thr*, Addr, SizeT );
+void libhb_range_noaccess ( Thr*, Addr, SizeT );
+
+/* For the convenience of callers, we offer to store one void* item in
+   a Thr, which we ignore, but the caller can get or set any time. */
+void* libhb_get_Thr_opaque ( Thr* );
+void  libhb_set_Thr_opaque ( Thr*, void* );
+
+/* Low level copy of shadow state from [src,src+len) to [dst,dst+len).
+   Overlapping moves are checked for and asserted against. */
+void libhb_copy_shadow_state ( Addr src, Addr dst, SizeT len );
+
+/* Call this periodically to give libhb the opportunity to
+   garbage-collect its internal data structures. */
+void libhb_maybe_GC ( void );
+
+/* Extract info from the conflicting-access machinery. */
+Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
+                              /*OUT*/Thr**  resThr,
+                              /*OUT*/SizeT* resSzB,
+                              /*OUT*/Bool*  resIsW,
+                              Thr* thr, Addr a, SizeT szB, Bool isW );
+
+#endif /* __LIBHB_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                  libhb.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/helgrind/.svn/text-base/libhb_core.c.svn-base b/helgrind/.svn/text-base/libhb_core.c.svn-base
new file mode 100644
index 0000000..572b26b
--- /dev/null
+++ b/helgrind/.svn/text-base/libhb_core.c.svn-base
@@ -0,0 +1,5011 @@
+
+/*--------------------------------------------------------------------*/
+/*--- LibHB: a library for implementing and checking               ---*/
+/*--- the happens-before relationship in concurrent programs.      ---*/
+/*---                                                 libhb_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of LibHB, a library for implementing and checking
+   the happens-before relationship in concurrent programs.
+
+   Copyright (C) 2008-2009 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_wordfm.h"
+#include "pub_tool_sparsewa.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_oset.h"
+#include "pub_tool_threadstate.h"
+#include "pub_tool_aspacemgr.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_errormgr.h"
+#include "pub_tool_options.h"        // VG_(clo_verbosity)
+#include "hg_basics.h"
+#include "hg_wordset.h"
+#include "hg_lock_n_thread.h"
+#include "hg_errors.h"
+
+#include "libhb.h"
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// Debugging #defines                                          //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/* Check the sanity of shadow values in the core memory state
+   machine.  Change #if 0 to #if 1 to enable this. */
+#if 0
+#  define CHECK_MSM 1
+#else
+#  define CHECK_MSM 0
+#endif
+
+
+/* Check sanity (reference counts, etc) in the conflicting access
+   machinery.  Change #if 0 to #if 1 to enable this. */
+#if 0
+#  define CHECK_CEM 1
+#else
+#  define CHECK_CEM 0
+#endif
+
+
+/* Check sanity in the compressed shadow memory machinery,
+   particularly in its caching innards.  Unfortunately there's no
+   almost-zero-cost way to make them selectable at run time.  Hence
+   set the #if 0 to #if 1 and rebuild if you want them. */
+#if 0
+#  define CHECK_ZSM 1  /* do sanity-check CacheLine stuff */
+#  define inline __attribute__((noinline))
+   /* probably want to ditch -fomit-frame-pointer too */
+#else
+#  define CHECK_ZSM 0   /* don't sanity-check CacheLine stuff */
+#endif
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// Forward declarations                                        //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/* fwds for
+   Globals needed by other parts of the library.  These are set
+   once at startup and then never changed. */
+static void        (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
+static ExeContext* (*main_get_EC)( Thr* ) = NULL;
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION BEGIN compressed shadow memory                      //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+#ifndef __HB_ZSM_H
+#define __HB_ZSM_H
+
+typedef  ULong  SVal;
+
+/* This value has special significance to the implementation, and callers
+   may not store it in the shadow memory. */
+#define SVal_INVALID (3ULL << 62)
+
+/* This is the default value for shadow memory.  Initially the shadow
+   memory contains no accessible areas and so all reads produce this
+   value.  TODO: make this caller-defineable. */
+#define SVal_NOACCESS (2ULL << 62)
+
+/* Initialise the library.  Once initialised, it will (or may) call
+   rcinc and rcdec in response to all the calls below, in order to
+   allow the user to do reference counting on the SVals stored herein.
+   It is important to understand, however, that due to internal
+   caching, the reference counts are in general inaccurate, and can be
+   both above or below the true reference count for an item.  In
+   particular, the library may indicate that the reference count for
+   an item is zero, when in fact it is not.
+
+   To make the reference counting exact and therefore non-pointless,
+   call zsm_flush_cache.  Immediately after it returns, the reference
+   counts for all items, as deduced by the caller by observing calls
+   to rcinc and rcdec, will be correct, and so any items with a zero
+   reference count may be freed (or at least considered to be
+   unreferenced by this library).
+*/
+static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
+
+static void zsm_set_range   ( Addr, SizeT, SVal );
+static SVal zsm_read8       ( Addr );
+static void zsm_copy_range  ( Addr, Addr, SizeT );
+static void zsm_flush_cache ( void );
+
+#endif /* ! __HB_ZSM_H */
+
+
+/* Round a up to the next multiple of N.  N must be a power of 2 */
+#define ROUNDUP(a, N)   ((a + N - 1) & ~(N-1))
+/* Round a down to the next multiple of N.  N must be a power of 2 */
+#define ROUNDDN(a, N)   ((a) & ~(N-1))
+
+
+
+/* ------ User-supplied RC functions ------ */
+static void(*rcinc)(SVal) = NULL;
+static void(*rcdec)(SVal) = NULL;
+
+
+/* ------ CacheLine ------ */
+
+#define N_LINE_BITS      6 /* must be >= 3 */
+#define N_LINE_ARANGE    (1 << N_LINE_BITS)
+#define N_LINE_TREES     (N_LINE_ARANGE >> 3)
+
+typedef
+   struct {
+      UShort descrs[N_LINE_TREES];
+      SVal   svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
+   }
+   CacheLine;
+
+#define TREE_DESCR_16_0 (1<<0)
+#define TREE_DESCR_32_0 (1<<1)
+#define TREE_DESCR_16_1 (1<<2)
+#define TREE_DESCR_64   (1<<3)
+#define TREE_DESCR_16_2 (1<<4)
+#define TREE_DESCR_32_1 (1<<5)
+#define TREE_DESCR_16_3 (1<<6)
+#define TREE_DESCR_8_0  (1<<7)
+#define TREE_DESCR_8_1  (1<<8)
+#define TREE_DESCR_8_2  (1<<9)
+#define TREE_DESCR_8_3  (1<<10)
+#define TREE_DESCR_8_4  (1<<11)
+#define TREE_DESCR_8_5  (1<<12)
+#define TREE_DESCR_8_6  (1<<13)
+#define TREE_DESCR_8_7  (1<<14)
+#define TREE_DESCR_DTY  (1<<15)
+
+typedef
+   struct {
+      SVal  dict[4]; /* can represent up to 4 diff values in the line */
+      UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
+                                      dict indexes */
+      /* if dict[0] == SVal_INVALID then dict[1] is the index of the
+         LineF to use, and dict[2..] are also SVal_INVALID. */
+   }
+   LineZ; /* compressed rep for a cache line */
+
+typedef
+   struct {
+      Bool inUse;
+      SVal w64s[N_LINE_ARANGE];
+   }
+   LineF; /* full rep for a cache line */
+
+/* Shadow memory.
+   Primary map is a WordFM Addr SecMap*.  
+   SecMaps cover some page-size-ish section of address space and hold
+     a compressed representation.
+   CacheLine-sized chunks of SecMaps are copied into a Cache, being
+   decompressed when moved into the cache and recompressed on the
+   way out.  Because of this, the cache must operate as a writeback
+   cache, not a writethrough one.
+
+   Each SecMap must hold a power-of-2 number of CacheLines.  Hence
+   N_SECMAP_BITS must >= N_LINE_BITS.
+*/
+#define N_SECMAP_BITS   13
+#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
+
+// # CacheLines held by a SecMap
+#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
+
+/* The data in the SecMap is held in the array of LineZs.  Each LineZ
+   either carries the required data directly, in a compressed
+   representation, or it holds (in .dict[0]) an index to the LineF in
+   .linesF that holds the full representation.
+
+   Currently-unused LineF's have their .inUse bit set to zero.
+   Since each in-use LineF is referred to be exactly one LineZ,
+   the number of .linesZ[] that refer to .linesF should equal
+   the number of .linesF[] that have .inUse == True.
+
+   RC obligations: the RCs presented to the user include exactly
+   the values in:
+   * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
+   * F reps that are in use (.inUse == True)
+
+   Hence the following actions at the following transitions are required:
+
+   F rep: .inUse==True  -> .inUse==False        -- rcdec_LineF
+   F rep: .inUse==False -> .inUse==True         -- rcinc_LineF
+   Z rep: .dict[0] from other to SVal_INVALID   -- rcdec_LineZ
+   Z rep: .dict[0] from SVal_INVALID to other   -- rcinc_LineZ
+*/
+typedef
+   struct {
+      UInt   magic;
+      LineZ  linesZ[N_SECMAP_ZLINES];
+      LineF* linesF;
+      UInt   linesF_size;
+   }
+   SecMap;
+
+#define SecMap_MAGIC   0x571e58cbU
+
+static inline Bool is_sane_SecMap ( SecMap* sm ) {
+   return sm != NULL && sm->magic == SecMap_MAGIC;
+}
+
+/* ------ Cache ------ */
+
+#define N_WAY_BITS 16
+#define N_WAY_NENT (1 << N_WAY_BITS)
+
+/* Each tag is the address of the associated CacheLine, rounded down
+   to a CacheLine address boundary.  A CacheLine size must be a power
+   of 2 and must be 8 or more.  Hence an easy way to initialise the
+   cache so it is empty is to set all the tag values to any value % 8
+   != 0, eg 1.  This means all queries in the cache initially miss.
+   It does however require us to detect and not writeback, any line
+   with a bogus tag. */
+typedef
+   struct {
+      CacheLine lyns0[N_WAY_NENT];
+      Addr      tags0[N_WAY_NENT];
+   }
+   Cache;
+
+static inline Bool is_valid_scache_tag ( Addr tag ) {
+   /* a valid tag should be naturally aligned to the start of
+      a CacheLine. */
+   return 0 == (tag & (N_LINE_ARANGE - 1));
+}
+
+
+/* --------- Primary data structures --------- */
+
+/* Shadow memory primary map */
+static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
+static Cache   cache_shmem;
+
+
+static UWord stats__secmaps_search       = 0; // # SM finds
+static UWord stats__secmaps_search_slow  = 0; // # SM lookupFMs
+static UWord stats__secmaps_allocd       = 0; // # SecMaps issued
+static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
+static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
+static UWord stats__secmap_linesZ_bytes  = 0; // .. using this much storage
+static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
+static UWord stats__secmap_linesF_bytes  = 0; //  .. using this much storage
+static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
+static UWord stats__cache_Z_fetches      = 0; // # Z lines fetched
+static UWord stats__cache_Z_wbacks       = 0; // # Z lines written back
+static UWord stats__cache_F_fetches      = 0; // # F lines fetched
+static UWord stats__cache_F_wbacks       = 0; // # F lines written back
+static UWord stats__cache_invals         = 0; // # cache invals
+static UWord stats__cache_flushes        = 0; // # cache flushes
+static UWord stats__cache_totrefs        = 0; // # total accesses
+static UWord stats__cache_totmisses      = 0; // # misses
+static ULong stats__cache_make_New_arange = 0; // total arange made New
+static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
+static UWord stats__cline_normalises     = 0; // # calls to cacheline_normalise
+static UWord stats__cline_read64s        = 0; // # calls to s_m_read64
+static UWord stats__cline_read32s        = 0; // # calls to s_m_read32
+static UWord stats__cline_read16s        = 0; // # calls to s_m_read16
+static UWord stats__cline_read8s         = 0; // # calls to s_m_read8
+static UWord stats__cline_write64s       = 0; // # calls to s_m_write64
+static UWord stats__cline_write32s       = 0; // # calls to s_m_write32
+static UWord stats__cline_write16s       = 0; // # calls to s_m_write16
+static UWord stats__cline_write8s        = 0; // # calls to s_m_write8
+static UWord stats__cline_set64s         = 0; // # calls to s_m_set64
+static UWord stats__cline_set32s         = 0; // # calls to s_m_set32
+static UWord stats__cline_set16s         = 0; // # calls to s_m_set16
+static UWord stats__cline_set8s          = 0; // # calls to s_m_set8
+static UWord stats__cline_get8s          = 0; // # calls to s_m_get8
+static UWord stats__cline_copy8s         = 0; // # calls to s_m_copy8
+static UWord stats__cline_64to32splits   = 0; // # 64-bit accesses split
+static UWord stats__cline_32to16splits   = 0; // # 32-bit accesses split
+static UWord stats__cline_16to8splits    = 0; // # 16-bit accesses split
+static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
+static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
+static UWord stats__cline_16to8pulldown  = 0; // # calls to pulldown_to_8
+
+static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
+   return a & ~(N_SECMAP_ARANGE - 1);
+}
+static inline UWord shmem__get_SecMap_offset ( Addr a ) {
+   return a & (N_SECMAP_ARANGE - 1);
+}
+
+
+/*----------------------------------------------------------------*/
+/*--- map_shmem :: WordFM Addr SecMap                          ---*/
+/*--- shadow memory (low level handlers) (shmem__* fns)        ---*/
+/*----------------------------------------------------------------*/
+
+/*--------------- SecMap allocation --------------- */
+
+static HChar* shmem__bigchunk_next = NULL;
+static HChar* shmem__bigchunk_end1 = NULL;
+
+static void* shmem__bigchunk_alloc ( SizeT n )
+{
+   const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
+   tl_assert(n > 0);
+   n = VG_ROUNDUP(n, 16);
+   tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
+   tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
+             <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
+   if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
+      if (0)
+      VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
+                  (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
+      shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
+      if (shmem__bigchunk_next == NULL)
+         VG_(out_of_memory_NORETURN)(
+            "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
+      shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
+   }
+   tl_assert(shmem__bigchunk_next);
+   tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
+   tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
+   shmem__bigchunk_next += n;
+   return shmem__bigchunk_next - n;
+}
+
+static SecMap* shmem__alloc_SecMap ( void )
+{
+   Word    i, j;
+   SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
+   if (0) VG_(printf)("alloc_SecMap %p\n",sm);
+   tl_assert(sm);
+   sm->magic = SecMap_MAGIC;
+   for (i = 0; i < N_SECMAP_ZLINES; i++) {
+      sm->linesZ[i].dict[0] = SVal_NOACCESS;
+      sm->linesZ[i].dict[1] = SVal_INVALID;
+      sm->linesZ[i].dict[2] = SVal_INVALID;
+      sm->linesZ[i].dict[3] = SVal_INVALID;
+      for (j = 0; j < N_LINE_ARANGE/4; j++)
+         sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
+   }
+   sm->linesF      = NULL;
+   sm->linesF_size = 0;
+   stats__secmaps_allocd++;
+   stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
+   stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
+   stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
+   return sm;
+}
+
+typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
+static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
+
+static SecMap* shmem__find_SecMap ( Addr ga ) 
+{
+   SecMap* sm    = NULL;
+   Addr    gaKey = shmem__round_to_SecMap_base(ga);
+   // Cache
+   stats__secmaps_search++;
+   if (LIKELY(gaKey == smCache[0].gaKey))
+      return smCache[0].sm;
+   if (LIKELY(gaKey == smCache[1].gaKey)) {
+      SMCacheEnt tmp = smCache[0];
+      smCache[0] = smCache[1];
+      smCache[1] = tmp;
+      return smCache[0].sm;
+   }
+   if (gaKey == smCache[2].gaKey) {
+      SMCacheEnt tmp = smCache[1];
+      smCache[1] = smCache[2];
+      smCache[2] = tmp;
+      return smCache[1].sm;
+   }
+   // end Cache
+   stats__secmaps_search_slow++;
+   if (VG_(lookupFM)( map_shmem,
+                      NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
+      tl_assert(sm != NULL);
+      smCache[2] = smCache[1];
+      smCache[1] = smCache[0];
+      smCache[0].gaKey = gaKey;
+      smCache[0].sm    = sm;
+   } else {
+      tl_assert(sm == NULL);
+   }
+   return sm;
+}
+
+static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
+{
+   SecMap* sm = shmem__find_SecMap ( ga );
+   if (LIKELY(sm)) {
+      return sm;
+   } else {
+      /* create a new one */
+      Addr gaKey = shmem__round_to_SecMap_base(ga);
+      sm = shmem__alloc_SecMap();
+      tl_assert(sm);
+      VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
+      return sm;
+   }
+}
+
+
+/* ------------ LineF and LineZ related ------------ */
+
+static void rcinc_LineF ( LineF* lineF ) {
+   UWord i;
+   tl_assert(lineF->inUse);
+   for (i = 0; i < N_LINE_ARANGE; i++)
+      rcinc(lineF->w64s[i]);
+}
+
+static void rcdec_LineF ( LineF* lineF ) {
+   UWord i;
+   tl_assert(lineF->inUse);
+   for (i = 0; i < N_LINE_ARANGE; i++)
+      rcdec(lineF->w64s[i]);
+}
+
+static void rcinc_LineZ ( LineZ* lineZ ) {
+   tl_assert(lineZ->dict[0] != SVal_INVALID);
+   rcinc(lineZ->dict[0]);
+   if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
+   if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
+   if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
+}
+
+static void rcdec_LineZ ( LineZ* lineZ ) {
+   tl_assert(lineZ->dict[0] != SVal_INVALID);
+   rcdec(lineZ->dict[0]);
+   if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
+   if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
+   if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
+}
+
+inline
+static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
+   Word bix, shft, mask, prep;
+   tl_assert(ix >= 0);
+   bix  = ix >> 2;
+   shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
+   mask = 3 << shft;
+   prep = b2 << shft;
+   arr[bix] = (arr[bix] & ~mask) | prep;
+}
+
+inline
+static UWord read_twobit_array ( UChar* arr, UWord ix ) {
+   Word bix, shft;
+   tl_assert(ix >= 0);
+   bix  = ix >> 2;
+   shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
+   return (arr[bix] >> shft) & 3;
+}
+
+/* Given address 'tag', find either the Z or F line containing relevant
+   data, so it can be read into the cache.
+*/
+static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
+                                  /*OUT*/LineF** fp, Addr tag ) {
+   LineZ* lineZ;
+   LineF* lineF;
+   UWord   zix;
+   SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
+   UWord   smoff = shmem__get_SecMap_offset(tag);
+   /* since smoff is derived from a valid tag, it should be
+      cacheline-aligned. */
+   tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
+   zix = smoff >> N_LINE_BITS;
+   tl_assert(zix < N_SECMAP_ZLINES);
+   lineZ = &sm->linesZ[zix];
+   lineF = NULL;
+   if (lineZ->dict[0] == SVal_INVALID) {
+      UInt fix = (UInt)lineZ->dict[1];
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < sm->linesF_size);
+      lineF = &sm->linesF[fix];
+      tl_assert(lineF->inUse);
+      lineZ = NULL;
+   }
+   *zp = lineZ;
+   *fp = lineF;
+}
+
+/* Given address 'tag', return the relevant SecMap and the index of
+   the LineZ within it, in the expectation that the line is to be
+   overwritten.  Regardless of whether 'tag' is currently associated
+   with a Z or F representation, to rcdec on the current
+   representation, in recognition of the fact that the contents are
+   just about to be overwritten. */
+static __attribute__((noinline))
+void find_Z_for_writing ( /*OUT*/SecMap** smp,
+                          /*OUT*/Word* zixp,
+                          Addr tag ) {
+   LineZ* lineZ;
+   LineF* lineF;
+   UWord   zix;
+   SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
+   UWord   smoff = shmem__get_SecMap_offset(tag);
+   /* since smoff is derived from a valid tag, it should be
+      cacheline-aligned. */
+   tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
+   zix = smoff >> N_LINE_BITS;
+   tl_assert(zix < N_SECMAP_ZLINES);
+   lineZ = &sm->linesZ[zix];
+   lineF = NULL;
+   /* re RCs, we are freeing up this LineZ/LineF so that new data can
+      be parked in it.  Hence have to rcdec it accordingly. */
+   /* If lineZ has an associated lineF, free it up. */
+   if (lineZ->dict[0] == SVal_INVALID) {
+      UInt fix = (UInt)lineZ->dict[1];
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < sm->linesF_size);
+      lineF = &sm->linesF[fix];
+      tl_assert(lineF->inUse);
+      rcdec_LineF(lineF);
+      lineF->inUse = False;
+   } else {
+      rcdec_LineZ(lineZ);
+   }
+   *smp  = sm;
+   *zixp = zix;
+}
+
+static __attribute__((noinline))
+void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
+   UInt        i, new_size;
+   LineF* nyu;
+
+   if (sm->linesF) {
+      tl_assert(sm->linesF_size > 0);
+   } else {
+      tl_assert(sm->linesF_size == 0);
+   }
+
+   if (sm->linesF) {
+      for (i = 0; i < sm->linesF_size; i++) {
+         if (!sm->linesF[i].inUse) {
+            *fixp = (Word)i;
+            return;
+         }
+      }
+   }
+
+   /* No free F line found.  Expand existing array and try again. */
+   new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
+   nyu      = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
+                           new_size * sizeof(LineF) );
+   tl_assert(nyu);
+
+   stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
+   stats__secmap_linesF_bytes  += (new_size - sm->linesF_size)
+                                  * sizeof(LineF);
+
+   if (0)
+   VG_(printf)("SM %p: expand F array from %d to %d\n", 
+               sm, (Int)sm->linesF_size, new_size);
+
+   for (i = 0; i < new_size; i++)
+      nyu[i].inUse = False;
+
+   if (sm->linesF) {
+      for (i = 0; i < sm->linesF_size; i++) {
+         tl_assert(sm->linesF[i].inUse);
+         nyu[i] = sm->linesF[i];
+      }
+      VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
+      HG_(free)(sm->linesF);
+   }
+
+   sm->linesF      = nyu;
+   sm->linesF_size = new_size;
+
+   for (i = 0; i < sm->linesF_size; i++) {
+      if (!sm->linesF[i].inUse) {
+         *fixp = (Word)i;
+         return;
+      }
+    }
+
+    /*NOTREACHED*/
+    tl_assert(0);
+}
+
+
+/* ------------ CacheLine and implicit-tree related ------------ */
+
+__attribute__((unused))
+static void pp_CacheLine ( CacheLine* cl ) {
+   Word i;
+   if (!cl) {
+      VG_(printf)("%s","pp_CacheLine(NULL)\n");
+      return;
+   }
+   for (i = 0; i < N_LINE_TREES; i++) 
+      VG_(printf)("   descr: %04lx\n", (UWord)cl->descrs[i]);
+   for (i = 0; i < N_LINE_ARANGE; i++) 
+      VG_(printf)("    sval: %08lx\n", (UWord)cl->svals[i]);
+}
+
+static UChar descr_to_validbits ( UShort descr )
+{
+   /* a.k.a Party Time for gcc's constant folder */
+#  define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
+                b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0)  \
+             ( (UShort) ( ( (b8_7)  << 14) | ( (b8_6)  << 13) | \
+                          ( (b8_5)  << 12) | ( (b8_4)  << 11) | \
+                          ( (b8_3)  << 10) | ( (b8_2)  << 9)  | \
+                          ( (b8_1)  << 8)  | ( (b8_0)  << 7)  | \
+                          ( (b16_3) << 6)  | ( (b32_1) << 5)  | \
+                          ( (b16_2) << 4)  | ( (b64)   << 3)  | \
+                          ( (b16_1) << 2)  | ( (b32_0) << 1)  | \
+                          ( (b16_0) << 0) ) )
+
+#  define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
+             ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
+                         ( (bit5) << 5) | ( (bit4) << 4) | \
+                         ( (bit3) << 3) | ( (bit2) << 2) | \
+                         ( (bit1) << 1) | ( (bit0) << 0) ) )
+
+   /* these should all get folded out at compile time */
+   tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
+   tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
+   tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
+
+   switch (descr) {
+   /*
+              +--------------------------------- TREE_DESCR_8_7
+              |             +------------------- TREE_DESCR_8_0
+              |             |  +---------------- TREE_DESCR_16_3
+              |             |  | +-------------- TREE_DESCR_32_1
+              |             |  | | +------------ TREE_DESCR_16_2
+              |             |  | | |  +--------- TREE_DESCR_64
+              |             |  | | |  |  +------ TREE_DESCR_16_1
+              |             |  | | |  |  | +---- TREE_DESCR_32_0
+              |             |  | | |  |  | | +-- TREE_DESCR_16_0
+              |             |  | | |  |  | | |
+              |             |  | | |  |  | | |   GRANULARITY, 7 -> 0 */
+   case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8  8 8 8 8 */
+                                                 return BYTE(1,1,1,1,1,1,1,1);
+   case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16   8 8 8 8 */
+                                                 return BYTE(1,1,0,1,1,1,1,1);
+   case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16  8 8  8 8 8 8 */ 
+                                                 return BYTE(0,1,1,1,1,1,1,1);
+   case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16  16   8 8 8 8 */
+                                                 return BYTE(0,1,0,1,1,1,1,1);
+
+   case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8  8 8 16 */ 
+                                                 return BYTE(1,1,1,1,1,1,0,1);
+   case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16   8 8 16 */
+                                                 return BYTE(1,1,0,1,1,1,0,1);
+   case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16  8 8  8 8 16 */
+                                                 return BYTE(0,1,1,1,1,1,0,1);
+   case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16  16   8 8 16 */
+                                                 return BYTE(0,1,0,1,1,1,0,1);
+
+   case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8  16 8 8 */
+                                                 return BYTE(1,1,1,1,0,1,1,1);
+   case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16   16 8 8 */
+                                                 return BYTE(1,1,0,1,0,1,1,1);
+   case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16  8 8  16 8 8 */
+                                                 return BYTE(0,1,1,1,0,1,1,1);
+   case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16  16   16 8 8 */
+                                                 return BYTE(0,1,0,1,0,1,1,1);
+
+   case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8  16 16 */
+                                                 return BYTE(1,1,1,1,0,1,0,1);
+   case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16   16 16 */
+                                                 return BYTE(1,1,0,1,0,1,0,1);
+   case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16  8 8  16 16 */
+                                                 return BYTE(0,1,1,1,0,1,0,1);
+   case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16  16   16 16 */
+                                                 return BYTE(0,1,0,1,0,1,0,1);
+
+   case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32  8 8 8 8 */
+                                                 return BYTE(0,0,0,1,1,1,1,1);
+   case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32  8 8 16  */
+                                                 return BYTE(0,0,0,1,1,1,0,1);
+   case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32  16  8 8 */
+                                                 return BYTE(0,0,0,1,0,1,1,1);
+   case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32  16  16  */
+                                                 return BYTE(0,0,0,1,0,1,0,1);
+
+   case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8  32 */
+                                                 return BYTE(1,1,1,1,0,0,0,1);
+   case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16   32 */
+                                                 return BYTE(1,1,0,1,0,0,0,1);
+   case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16  8 8  32 */
+                                                 return BYTE(0,1,1,1,0,0,0,1);
+   case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16  16   32 */
+                                                 return BYTE(0,1,0,1,0,0,0,1);
+
+   case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
+                                                 return BYTE(0,0,0,1,0,0,0,1);
+
+   case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
+                                                 return BYTE(0,0,0,0,0,0,0,1);
+
+   default: return BYTE(0,0,0,0,0,0,0,0); 
+                   /* INVALID - any valid descr produces at least one
+                      valid bit in tree[0..7]*/
+   }
+   /* NOTREACHED*/
+   tl_assert(0);
+
+#  undef DESCR
+#  undef BYTE
+}
+
+__attribute__((unused))
+static Bool is_sane_Descr ( UShort descr ) {
+   return descr_to_validbits(descr) != 0;
+}
+
+static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
+   VG_(sprintf)(dst, 
+                "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
+                (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_64)   ? 1 : 0),
+                (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
+                (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
+   );
+}
+static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
+   VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
+                     (Int)((byte & 128) ? 1 : 0),
+                     (Int)((byte &  64) ? 1 : 0),
+                     (Int)((byte &  32) ? 1 : 0),
+                     (Int)((byte &  16) ? 1 : 0),
+                     (Int)((byte &   8) ? 1 : 0),
+                     (Int)((byte &   4) ? 1 : 0),
+                     (Int)((byte &   2) ? 1 : 0),
+                     (Int)((byte &   1) ? 1 : 0)
+   );
+}
+
+static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
+   Word  i;
+   UChar validbits = descr_to_validbits(descr);
+   HChar buf[128], buf2[128];
+   if (validbits == 0)
+      goto bad;
+   for (i = 0; i < 8; i++) {
+      if (validbits & (1<<i)) {
+         if (tree[i] == SVal_INVALID)
+            goto bad;
+      } else {
+         if (tree[i] != SVal_INVALID)
+            goto bad;
+      }
+   }
+   return True;
+  bad:
+   sprintf_Descr( buf, descr );
+   sprintf_Byte( buf2, validbits );
+   VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
+   VG_(printf)("   validbits 0x%02lx    %s\n", (UWord)validbits, buf2);
+   VG_(printf)("       descr 0x%04lx  %s\n", (UWord)descr, buf);
+   for (i = 0; i < 8; i++)
+      VG_(printf)("   [%ld] 0x%016llx\n", i, tree[i]);
+   VG_(printf)("%s","}\n");
+   return 0;
+}
+
+static Bool is_sane_CacheLine ( CacheLine* cl )
+{
+   Word tno, cloff;
+
+   if (!cl) goto bad;
+
+   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
+      UShort descr = cl->descrs[tno];
+      SVal*  tree  = &cl->svals[cloff];
+      if (!is_sane_Descr_and_Tree(descr, tree))
+         goto bad;
+   }
+   tl_assert(cloff == N_LINE_ARANGE);
+   return True;
+  bad:
+   pp_CacheLine(cl);
+   return False;
+}
+
+static UShort normalise_tree ( /*MOD*/SVal* tree )
+{
+   UShort descr;
+   /* pre: incoming tree[0..7] does not have any invalid shvals, in
+      particular no zeroes. */
+   if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
+                || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
+                || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
+                || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
+      tl_assert(0);
+   
+   descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
+           | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
+           | TREE_DESCR_8_1 | TREE_DESCR_8_0;
+   /* build 16-bit layer */
+   if (tree[1] == tree[0]) {
+      tree[1] = SVal_INVALID;
+      descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
+      descr |= TREE_DESCR_16_0;
+   }
+   if (tree[3] == tree[2]) {
+      tree[3] = SVal_INVALID;
+      descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
+      descr |= TREE_DESCR_16_1;
+   }
+   if (tree[5] == tree[4]) {
+      tree[5] = SVal_INVALID;
+      descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
+      descr |= TREE_DESCR_16_2;
+   }
+   if (tree[7] == tree[6]) {
+      tree[7] = SVal_INVALID;
+      descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
+      descr |= TREE_DESCR_16_3;
+   }
+   /* build 32-bit layer */
+   if (tree[2] == tree[0]
+       && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
+      tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
+      descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
+      descr |= TREE_DESCR_32_0;
+   }
+   if (tree[6] == tree[4]
+       && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
+      tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
+      descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
+      descr |= TREE_DESCR_32_1;
+   }
+   /* build 64-bit layer */
+   if (tree[4] == tree[0]
+       && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
+      tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
+      descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
+      descr |= TREE_DESCR_64;
+   }
+   return descr;
+}
+
+/* This takes a cacheline where all the data is at the leaves
+   (w8[..]) and builds a correctly normalised tree. */
+static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
+{
+   Word tno, cloff;
+   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
+      SVal* tree = &cl->svals[cloff];
+      cl->descrs[tno] = normalise_tree( tree );
+   }
+   tl_assert(cloff == N_LINE_ARANGE);
+   if (CHECK_ZSM)
+      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   stats__cline_normalises++;
+}
+
+
+typedef struct { UChar count; SVal sval; } CountedSVal;
+
+static
+void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
+                               /*OUT*/Word* dstUsedP,
+                               Word nDst, CacheLine* src )
+{
+   Word  tno, cloff, dstUsed;
+
+   tl_assert(nDst == N_LINE_ARANGE);
+   dstUsed = 0;
+
+   for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
+      UShort descr = src->descrs[tno];
+      SVal*  tree  = &src->svals[cloff];
+
+      /* sequentialise the tree described by (descr,tree). */
+#     define PUT(_n,_v)                                \
+         do { dst[dstUsed  ].count = (_n);             \
+              dst[dstUsed++].sval  = (_v);             \
+         } while (0)
+
+      /* byte 0 */
+      if (descr & TREE_DESCR_64)   PUT(8, tree[0]); else
+      if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
+      if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
+      if (descr & TREE_DESCR_8_0)  PUT(1, tree[0]);
+      /* byte 1 */
+      if (descr & TREE_DESCR_8_1)  PUT(1, tree[1]);
+      /* byte 2 */
+      if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
+      if (descr & TREE_DESCR_8_2)  PUT(1, tree[2]);
+      /* byte 3 */
+      if (descr & TREE_DESCR_8_3)  PUT(1, tree[3]);
+      /* byte 4 */
+      if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
+      if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
+      if (descr & TREE_DESCR_8_4)  PUT(1, tree[4]);
+      /* byte 5 */
+      if (descr & TREE_DESCR_8_5)  PUT(1, tree[5]);
+      /* byte 6 */
+      if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
+      if (descr & TREE_DESCR_8_6)  PUT(1, tree[6]);
+      /* byte 7 */
+      if (descr & TREE_DESCR_8_7)  PUT(1, tree[7]);
+
+#     undef PUT
+      /* END sequentialise the tree described by (descr,tree). */
+
+   }
+   tl_assert(cloff == N_LINE_ARANGE);
+   tl_assert(dstUsed <= nDst);
+
+   *dstUsedP = dstUsed;
+}
+
+/* Write the cacheline 'wix' to backing store.  Where it ends up
+   is determined by its tag field. */
+static __attribute__((noinline)) void cacheline_wback ( UWord wix )
+{
+   Word        i, j, k, m;
+   Addr        tag;
+   SecMap*     sm;
+   CacheLine*  cl;
+   LineZ* lineZ;
+   LineF* lineF;
+   Word        zix, fix, csvalsUsed;
+   CountedSVal csvals[N_LINE_ARANGE];
+   SVal        sv;
+
+   if (0)
+   VG_(printf)("scache wback line %d\n", (Int)wix);
+
+   tl_assert(wix >= 0 && wix < N_WAY_NENT);
+
+   tag =  cache_shmem.tags0[wix];
+   cl  = &cache_shmem.lyns0[wix];
+
+   /* The cache line may have been invalidated; if so, ignore it. */
+   if (!is_valid_scache_tag(tag))
+      return;
+
+   /* Where are we going to put it? */
+   sm         = NULL;
+   lineZ      = NULL;
+   lineF      = NULL;
+   zix = fix = -1;
+
+   /* find the Z line to write in and rcdec it or the associated F
+      line. */
+   find_Z_for_writing( &sm, &zix, tag );
+
+   tl_assert(sm);
+   tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
+   lineZ = &sm->linesZ[zix];
+
+   /* Generate the data to be stored */
+   if (CHECK_ZSM)
+      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+
+   csvalsUsed = -1;
+   sequentialise_CacheLine( csvals, &csvalsUsed, 
+                            N_LINE_ARANGE, cl );
+   tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
+   if (0) VG_(printf)("%lu ", csvalsUsed);
+
+   lineZ->dict[0] = lineZ->dict[1] 
+                  = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+
+   /* i indexes actual shadow values, k is cursor in csvals */
+   i = 0;
+   for (k = 0; k < csvalsUsed; k++) {
+
+      sv = csvals[k].sval;
+      if (CHECK_ZSM)
+         tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
+      /* do we already have it? */
+      if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
+      if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
+      if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
+      if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
+      /* no.  look for a free slot. */
+      if (CHECK_ZSM)
+         tl_assert(sv != SVal_INVALID);
+      if (lineZ->dict[0] 
+          == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
+      if (lineZ->dict[1]
+          == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
+      if (lineZ->dict[2]
+          == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
+      if (lineZ->dict[3]
+          == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
+      break; /* we'll have to use the f rep */
+     dict_ok:
+      m = csvals[k].count;
+      if (m == 8) {
+         write_twobit_array( lineZ->ix2s, i+0, j );
+         write_twobit_array( lineZ->ix2s, i+1, j );
+         write_twobit_array( lineZ->ix2s, i+2, j );
+         write_twobit_array( lineZ->ix2s, i+3, j );
+         write_twobit_array( lineZ->ix2s, i+4, j );
+         write_twobit_array( lineZ->ix2s, i+5, j );
+         write_twobit_array( lineZ->ix2s, i+6, j );
+         write_twobit_array( lineZ->ix2s, i+7, j );
+         i += 8;
+      }
+      else if (m == 4) {
+         write_twobit_array( lineZ->ix2s, i+0, j );
+         write_twobit_array( lineZ->ix2s, i+1, j );
+         write_twobit_array( lineZ->ix2s, i+2, j );
+         write_twobit_array( lineZ->ix2s, i+3, j );
+         i += 4;
+      }
+      else if (m == 1) {
+         write_twobit_array( lineZ->ix2s, i+0, j );
+         i += 1;
+      }
+      else if (m == 2) {
+         write_twobit_array( lineZ->ix2s, i+0, j );
+         write_twobit_array( lineZ->ix2s, i+1, j );
+         i += 2;
+      }
+      else {
+         tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
+      }
+
+   }
+
+   if (LIKELY(i == N_LINE_ARANGE)) {
+      /* Construction of the compressed representation was
+         successful. */
+      rcinc_LineZ(lineZ);
+      stats__cache_Z_wbacks++;
+   } else {
+      /* Cannot use the compressed(z) representation.  Use the full(f)
+         rep instead. */
+      tl_assert(i >= 0 && i < N_LINE_ARANGE);
+      alloc_F_for_writing( sm, &fix );
+      tl_assert(sm->linesF);
+      tl_assert(sm->linesF_size > 0);
+      tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
+      lineF = &sm->linesF[fix];
+      tl_assert(!lineF->inUse);
+      lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+      lineZ->dict[1] = (SVal)fix;
+      lineF->inUse = True;
+      i = 0;
+      for (k = 0; k < csvalsUsed; k++) {
+         if (CHECK_ZSM)
+            tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
+         sv = csvals[k].sval;
+         if (CHECK_ZSM)
+            tl_assert(sv != SVal_INVALID);
+         for (m = csvals[k].count; m > 0; m--) {
+            lineF->w64s[i] = sv;
+            i++;
+         }
+      }
+      tl_assert(i == N_LINE_ARANGE);
+      rcinc_LineF(lineF);
+      stats__cache_F_wbacks++;
+   }
+}
+
+/* Fetch the cacheline 'wix' from the backing store.  The tag
+   associated with 'wix' is assumed to have already been filled in;
+   hence that is used to determine where in the backing store to read
+   from. */
+static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
+{
+   Word       i;
+   Addr       tag;
+   CacheLine* cl;
+   LineZ*     lineZ;
+   LineF*     lineF;
+
+   if (0)
+   VG_(printf)("scache fetch line %d\n", (Int)wix);
+
+   tl_assert(wix >= 0 && wix < N_WAY_NENT);
+
+   tag =  cache_shmem.tags0[wix];
+   cl  = &cache_shmem.lyns0[wix];
+
+   /* reject nonsense requests */
+   tl_assert(is_valid_scache_tag(tag));
+
+   lineZ = NULL;
+   lineF = NULL;
+   find_ZF_for_reading( &lineZ, &lineF, tag );
+   tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
+
+   /* expand the data into the bottom layer of the tree, then get
+      cacheline_normalise to build the descriptor array. */
+   if (lineF) {
+      tl_assert(lineF->inUse);
+      for (i = 0; i < N_LINE_ARANGE; i++) {
+         cl->svals[i] = lineF->w64s[i];
+      }
+      stats__cache_F_fetches++;
+   } else {
+      for (i = 0; i < N_LINE_ARANGE; i++) {
+         SVal sv;
+         UWord ix = read_twobit_array( lineZ->ix2s, i );
+         /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
+         sv = lineZ->dict[ix];
+         tl_assert(sv != SVal_INVALID);
+         cl->svals[i] = sv;
+      }
+      stats__cache_Z_fetches++;
+   }
+   normalise_CacheLine( cl );
+}
+
+static void shmem__invalidate_scache ( void ) {
+   Word wix;
+   if (0) VG_(printf)("%s","scache inval\n");
+   tl_assert(!is_valid_scache_tag(1));
+   for (wix = 0; wix < N_WAY_NENT; wix++) {
+      cache_shmem.tags0[wix] = 1/*INVALID*/;
+   }
+   stats__cache_invals++;
+}
+
+static void shmem__flush_and_invalidate_scache ( void ) {
+   Word wix;
+   Addr tag;
+   if (0) VG_(printf)("%s","scache flush and invalidate\n");
+   tl_assert(!is_valid_scache_tag(1));
+   for (wix = 0; wix < N_WAY_NENT; wix++) {
+      tag = cache_shmem.tags0[wix];
+      if (tag == 1/*INVALID*/) {
+         /* already invalid; nothing to do */
+      } else {
+         tl_assert(is_valid_scache_tag(tag));
+         cacheline_wback( wix );
+      }
+      cache_shmem.tags0[wix] = 1/*INVALID*/;
+   }
+   stats__cache_flushes++;
+   stats__cache_invals++;
+}
+
+
+static inline Bool aligned16 ( Addr a ) {
+   return 0 == (a & 1);
+}
+static inline Bool aligned32 ( Addr a ) {
+   return 0 == (a & 3);
+}
+static inline Bool aligned64 ( Addr a ) {
+   return 0 == (a & 7);
+}
+static inline UWord get_cacheline_offset ( Addr a ) {
+   return (UWord)(a & (N_LINE_ARANGE - 1));
+}
+static inline Addr cacheline_ROUNDUP ( Addr a ) {
+   return ROUNDUP(a, N_LINE_ARANGE);
+}
+static inline Addr cacheline_ROUNDDN ( Addr a ) {
+   return ROUNDDN(a, N_LINE_ARANGE);
+}
+static inline UWord get_treeno ( Addr a ) {
+   return get_cacheline_offset(a) >> 3;
+}
+static inline UWord get_tree_offset ( Addr a ) {
+   return a & 7;
+}
+
+static __attribute__((noinline))
+       CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
+static inline CacheLine* get_cacheline ( Addr a )
+{
+   /* tag is 'a' with the in-line offset masked out, 
+      eg a[31]..a[4] 0000 */
+   Addr       tag = a & ~(N_LINE_ARANGE - 1);
+   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+   stats__cache_totrefs++;
+   if (LIKELY(tag == cache_shmem.tags0[wix])) {
+      return &cache_shmem.lyns0[wix];
+   } else {
+      return get_cacheline_MISS( a );
+   }
+}
+
+static __attribute__((noinline))
+       CacheLine* get_cacheline_MISS ( Addr a )
+{
+   /* tag is 'a' with the in-line offset masked out, 
+      eg a[31]..a[4] 0000 */
+
+   CacheLine* cl;
+   Addr*      tag_old_p;
+   Addr       tag = a & ~(N_LINE_ARANGE - 1);
+   UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+
+   tl_assert(tag != cache_shmem.tags0[wix]);
+
+   /* Dump the old line into the backing store. */
+   stats__cache_totmisses++;
+
+   cl        = &cache_shmem.lyns0[wix];
+   tag_old_p = &cache_shmem.tags0[wix];
+
+   if (is_valid_scache_tag( *tag_old_p )) {
+      /* EXPENSIVE and REDUNDANT: callee does it */
+      if (CHECK_ZSM)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+      cacheline_wback( wix );
+   }
+   /* and reload the new one */
+   *tag_old_p = tag;
+   cacheline_fetch( wix );
+   if (CHECK_ZSM)
+      tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   return cl;
+}
+
+static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+   stats__cline_64to32pulldown++;
+   switch (toff) {
+      case 0: case 4:
+         tl_assert(descr & TREE_DESCR_64);
+         tree[4] = tree[0];
+         descr &= ~TREE_DESCR_64;
+         descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+   stats__cline_32to16pulldown++;
+   switch (toff) {
+      case 0: case 2:
+         if (!(descr & TREE_DESCR_32_0)) {
+            descr = pulldown_to_32(tree, 0, descr);
+         }
+         tl_assert(descr & TREE_DESCR_32_0);
+         tree[2] = tree[0];
+         descr &= ~TREE_DESCR_32_0;
+         descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
+         break;
+      case 4: case 6:
+         if (!(descr & TREE_DESCR_32_1)) {
+            descr = pulldown_to_32(tree, 4, descr);
+         }
+         tl_assert(descr & TREE_DESCR_32_1);
+         tree[6] = tree[4];
+         descr &= ~TREE_DESCR_32_1;
+         descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
+   stats__cline_16to8pulldown++;
+   switch (toff) {
+      case 0: case 1:
+         if (!(descr & TREE_DESCR_16_0)) {
+            descr = pulldown_to_16(tree, 0, descr);
+         }
+         tl_assert(descr & TREE_DESCR_16_0);
+         tree[1] = tree[0];
+         descr &= ~TREE_DESCR_16_0;
+         descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
+         break;
+      case 2: case 3:
+         if (!(descr & TREE_DESCR_16_1)) {
+            descr = pulldown_to_16(tree, 2, descr);
+         }
+         tl_assert(descr & TREE_DESCR_16_1);
+         tree[3] = tree[2];
+         descr &= ~TREE_DESCR_16_1;
+         descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
+         break;
+      case 4: case 5:
+         if (!(descr & TREE_DESCR_16_2)) {
+            descr = pulldown_to_16(tree, 4, descr);
+         }
+         tl_assert(descr & TREE_DESCR_16_2);
+         tree[5] = tree[4];
+         descr &= ~TREE_DESCR_16_2;
+         descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
+         break;
+      case 6: case 7:
+         if (!(descr & TREE_DESCR_16_3)) {
+            descr = pulldown_to_16(tree, 6, descr);
+         }
+         tl_assert(descr & TREE_DESCR_16_3);
+         tree[7] = tree[6];
+         descr &= ~TREE_DESCR_16_3;
+         descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+
+static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
+   UShort mask;
+   switch (toff) {
+      case 0:
+         mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_16_0;
+         break;
+      case 2:
+         mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_16_1;
+         break;
+      case 4:
+         mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_16_2;
+         break;
+      case 6:
+         mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_16_3;
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
+   UShort mask;
+   switch (toff) {
+      case 0:
+         if (!(descr & TREE_DESCR_16_0))
+            descr = pullup_descr_to_16(descr, 0);
+         if (!(descr & TREE_DESCR_16_1))
+            descr = pullup_descr_to_16(descr, 2);
+         mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_32_0;
+         break;
+      case 4:
+         if (!(descr & TREE_DESCR_16_2))
+            descr = pullup_descr_to_16(descr, 4);
+         if (!(descr & TREE_DESCR_16_3))
+            descr = pullup_descr_to_16(descr, 6);
+         mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
+         tl_assert( (descr & mask) == mask );
+         descr &= ~mask;
+         descr |= TREE_DESCR_32_1;
+         break;
+      default:
+         tl_assert(0);
+   }
+   return descr;
+}
+
+static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
+   switch (toff) {
+      case 0: case 4:
+         return 0 != (descr & TREE_DESCR_64);
+      default:
+         tl_assert(0);
+   }
+}
+
+static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
+   switch (toff) {
+      case 0:
+         return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
+      case 2:
+         return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
+      case 4:
+         return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
+      case 6:
+         return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
+      default:
+         tl_assert(0);
+   }
+}
+
+/* ------------ Cache management ------------ */
+
+static void zsm_flush_cache ( void )
+{
+   shmem__flush_and_invalidate_scache();
+}
+
+
+static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
+{
+   tl_assert( sizeof(UWord) == sizeof(Addr) );
+
+   rcinc = p_rcinc;
+   rcdec = p_rcdec;
+
+   tl_assert(map_shmem == NULL);
+   map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
+                           HG_(free), 
+                           NULL/*unboxed UWord cmp*/);
+   tl_assert(map_shmem != NULL);
+   shmem__invalidate_scache();
+
+   /* a SecMap must contain an integral number of CacheLines */
+   tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
+   /* also ... a CacheLine holds an integral number of trees */
+   tl_assert(0 == (N_LINE_ARANGE % 8));
+}
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION END compressed shadow memory                        //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION BEGIN vts primitives                                //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+#ifndef __HB_VTS_H
+#define __HB_VTS_H
+
+/* VtsIDs can't exceed 30 bits, since they have to be packed into the
+   lowest 30 bits of an SVal. */
+typedef  UInt  VtsID;
+#define VtsID_INVALID 0xFFFFFFFF
+
+/* A VTS contains .ts, its vector clock, and also .id, a field to hold
+   a backlink for the caller's convenience.  Since we have no idea
+   what to set that to in the library, it always gets set to
+   VtsID_INVALID. */
+typedef
+   struct {
+      VtsID   id;
+      XArray* ts; /* XArray* ScalarTS(abstract) */
+   }
+   VTS;
+
+
+/* Create a new, empty VTS. */
+VTS* VTS__new ( void );
+
+/* Delete this VTS in its entirety. */
+void VTS__delete ( VTS* vts );
+
+/* Create a new singleton VTS. */
+VTS* VTS__singleton ( Thr* thr, ULong tym );
+
+/* Return a new VTS in which vts[me]++, so to speak.  'vts' itself is
+   not modified. */
+VTS* VTS__tick ( Thr* me, VTS* vts );
+
+/* Return a new VTS constructed as the join (max) of the 2 args.
+   Neither arg is modified. */
+VTS* VTS__join ( VTS* a, VTS* b );
+
+/* Compute the partial ordering relation of the two args. */
+typedef
+   enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
+   POrd;
+
+POrd VTS__cmp ( VTS* a, VTS* b );
+
+/* Compute an arbitrary structural (total) ordering on the two args,
+   based on their VCs, so they can be looked up in a table, tree, etc.
+   Returns -1, 0 or 1. */
+Word VTS__cmp_structural ( VTS* a, VTS* b );
+
+/* Debugging only.  Display the given VTS in the buffer. */
+void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
+
+/* Debugging only.  Return vts[index], so to speak. */
+ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
+
+#endif /* ! __HB_VTS_H */
+
+
+/*--------------- to do with Vector Timestamps ---------------*/
+
+/* Scalar Timestamp */
+typedef
+   struct {
+      Thr*    thr;
+      ULong   tym;
+   }
+   ScalarTS;
+
+
+static Bool is_sane_VTS ( VTS* vts )
+{
+   UWord     i, n;
+   ScalarTS  *st1, *st2;
+   if (!vts) return False;
+   if (!vts->ts) return False;
+   n = VG_(sizeXA)( vts->ts );
+   if (n >= 2) {
+      for (i = 0; i < n-1; i++) {
+         st1 = VG_(indexXA)( vts->ts, i );
+         st2 = VG_(indexXA)( vts->ts, i+1 );
+         if (st1->thr >= st2->thr)
+            return False;
+         if (st1->tym == 0 || st2->tym == 0)
+            return False;
+      }
+   }
+   return True;
+}
+
+
+/* Create a new, empty VTS.
+*/
+VTS* VTS__new ( void )
+{
+   VTS* vts;
+   vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
+   tl_assert(vts);
+   vts->id = VtsID_INVALID;
+   vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
+                         HG_(free), sizeof(ScalarTS) );
+   tl_assert(vts->ts);
+   return vts;
+}
+
+
+/* Delete this VTS in its entirety.
+*/
+void VTS__delete ( VTS* vts )
+{
+   tl_assert(vts);
+   tl_assert(vts->ts);
+   VG_(deleteXA)( vts->ts );
+   HG_(free)(vts);
+}
+
+
+/* Create a new singleton VTS. 
+*/
+VTS* VTS__singleton ( Thr* thr, ULong tym ) {
+   ScalarTS st;
+   VTS*     vts;
+   tl_assert(thr);
+   tl_assert(tym >= 1);
+   vts = VTS__new();
+   st.thr = thr;
+   st.tym = tym;
+   VG_(addToXA)( vts->ts, &st );
+   return vts;
+}
+
+
+/* Return a new VTS in which vts[me]++, so to speak.  'vts' itself is
+   not modified.
+*/
+VTS* VTS__tick ( Thr* me, VTS* vts )
+{
+   ScalarTS* here = NULL;
+   ScalarTS  tmp;
+   VTS*      res;
+   Word      i, n; 
+   tl_assert(me);
+   tl_assert(is_sane_VTS(vts));
+   //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
+   //                   (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
+   res = VTS__new();
+   n = VG_(sizeXA)( vts->ts );
+
+   /* main loop doesn't handle zero-entry case correctly, so
+      special-case it. */
+   if (n == 0) {
+      tmp.thr = me;
+      tmp.tym = 1;
+      VG_(addToXA)( res->ts, &tmp );
+      tl_assert(is_sane_VTS(res));
+      return res;
+   }
+
+   for (i = 0; i < n; i++) {
+      here = VG_(indexXA)( vts->ts, i );
+      if (me < here->thr) {
+         /* We just went past 'me', without seeing it. */
+         tmp.thr = me;
+         tmp.tym = 1;
+         VG_(addToXA)( res->ts, &tmp );
+         tmp = *here;
+         VG_(addToXA)( res->ts, &tmp );
+         i++;
+         break;
+      } 
+      else if (me == here->thr) {
+         tmp = *here;
+         tmp.tym++;
+         VG_(addToXA)( res->ts, &tmp );
+         i++;
+         break;
+      }
+      else /* me > here->thr */ {
+         tmp = *here;
+         VG_(addToXA)( res->ts, &tmp );
+      }
+   }
+   tl_assert(i >= 0 && i <= n);
+   if (i == n && here && here->thr < me) {
+      tmp.thr = me;
+      tmp.tym = 1;
+      VG_(addToXA)( res->ts, &tmp );
+   } else {
+      for (/*keepgoing*/; i < n; i++) {
+         here = VG_(indexXA)( vts->ts, i );
+         tmp = *here;
+         VG_(addToXA)( res->ts, &tmp );
+      }
+   }
+   tl_assert(is_sane_VTS(res));
+   //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
+   //                   (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
+   return res;
+}
+
+
+/* Return a new VTS constructed as the join (max) of the 2 args.
+   Neither arg is modified.
+*/
+VTS* VTS__join ( VTS* a, VTS* b )
+{
+   Word     ia, ib, useda, usedb;
+   ULong    tyma, tymb, tymMax;
+   Thr*     thr;
+   VTS*     res;
+
+   tl_assert(a && a->ts);
+   tl_assert(b && b->ts);
+   useda = VG_(sizeXA)( a->ts );
+   usedb = VG_(sizeXA)( b->ts );
+
+   res = VTS__new();
+   ia = ib = 0;
+
+   while (1) {
+
+      /* This logic is to enumerate triples (thr, tyma, tymb) drawn
+         from a and b in order, where thr is the next Thr*
+         occurring in either a or b, and tyma/b are the relevant
+         scalar timestamps, taking into account implicit zeroes. */
+      tl_assert(ia >= 0 && ia <= useda);
+      tl_assert(ib >= 0 && ib <= usedb);
+
+      if        (ia == useda && ib == usedb) {
+         /* both empty - done */
+         break;
+
+      } else if (ia == useda && ib != usedb) {
+         /* a empty, use up b */
+         ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+         thr  = tmpb->thr;
+         tyma = 0;
+         tymb = tmpb->tym;
+         ib++;
+
+      } else if (ia != useda && ib == usedb) {
+         /* b empty, use up a */
+         ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+         thr  = tmpa->thr;
+         tyma = tmpa->tym;
+         tymb = 0;
+         ia++;
+
+      } else {
+         /* both not empty; extract lowest-Thr*'d triple */
+         ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+         ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+         if (tmpa->thr < tmpb->thr) {
+            /* a has the lowest unconsidered Thr* */
+            thr  = tmpa->thr;
+            tyma = tmpa->tym;
+            tymb = 0;
+            ia++;
+         } else if (tmpa->thr > tmpb->thr) {
+            /* b has the lowest unconsidered Thr* */
+            thr  = tmpb->thr;
+            tyma = 0;
+            tymb = tmpb->tym;
+            ib++;
+         } else {
+            /* they both next mention the same Thr* */
+            tl_assert(tmpa->thr == tmpb->thr);
+            thr  = tmpa->thr; /* == tmpb->thr */
+            tyma = tmpa->tym;
+            tymb = tmpb->tym;
+            ia++;
+            ib++;
+         }
+      }
+
+      /* having laboriously determined (thr, tyma, tymb), do something
+         useful with it. */
+      tymMax = tyma > tymb ? tyma : tymb;
+      if (tymMax > 0) {
+         ScalarTS st;
+         st.thr = thr;
+         st.tym = tymMax;
+         VG_(addToXA)( res->ts, &st );
+      }
+
+   }
+
+   tl_assert(is_sane_VTS( res ));
+
+   return res;
+}
+
+
+/* Compute the partial ordering relation of the two args.
+*/
+POrd VTS__cmp ( VTS* a, VTS* b )
+{
+   Word     ia, ib, useda, usedb;
+   ULong    tyma, tymb;
+
+   Bool all_leq = True;
+   Bool all_geq = True;
+
+   tl_assert(a && a->ts);
+   tl_assert(b && b->ts);
+   useda = VG_(sizeXA)( a->ts );
+   usedb = VG_(sizeXA)( b->ts );
+
+   ia = ib = 0;
+
+   while (1) {
+
+      /* This logic is to enumerate doubles (tyma, tymb) drawn
+         from a and b in order, and tyma/b are the relevant
+         scalar timestamps, taking into account implicit zeroes. */
+      tl_assert(ia >= 0 && ia <= useda);
+      tl_assert(ib >= 0 && ib <= usedb);
+
+      if        (ia == useda && ib == usedb) {
+         /* both empty - done */
+         break;
+
+      } else if (ia == useda && ib != usedb) {
+         /* a empty, use up b */
+         ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+         tyma = 0;
+         tymb = tmpb->tym;
+         ib++;
+
+      } else if (ia != useda && ib == usedb) {
+         /* b empty, use up a */
+         ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+         tyma = tmpa->tym;
+         tymb = 0;
+         ia++;
+
+      } else {
+         /* both not empty; extract lowest-Thr*'d triple */
+         ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+         ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+         if (tmpa->thr < tmpb->thr) {
+            /* a has the lowest unconsidered Thr* */
+            tyma = tmpa->tym;
+            tymb = 0;
+            ia++;
+         }
+         else
+         if (tmpa->thr > tmpb->thr) {
+            /* b has the lowest unconsidered Thr* */
+            tyma = 0;
+            tymb = tmpb->tym;
+            ib++;
+         } else {
+            /* they both next mention the same Thr* */
+            tl_assert(tmpa->thr == tmpb->thr);
+            tyma = tmpa->tym;
+            tymb = tmpb->tym;
+            ia++;
+            ib++;
+         }
+      }
+
+      /* having laboriously determined (tyma, tymb), do something
+         useful with it. */
+      if (tyma < tymb)
+         all_geq = False;
+      if (tyma > tymb)
+         all_leq = False;
+   }
+
+   if (all_leq && all_geq)
+      return POrd_EQ;
+   /* now we know they aren't equal, so either all_leq or all_geq or
+      both are false. */
+   if (all_leq)
+      return POrd_LT;
+   if (all_geq)
+      return POrd_GT;
+   /* hmm, neither all_geq or all_leq.  This means unordered. */
+   return POrd_UN;
+}
+
+
+/* Compute an arbitrary structural (total) ordering on the two args,
+   based on their VCs, so they can be looked up in a table, tree, etc.
+   Returns -1, 0 or 1.  (really just 'deriving Ord' :-)
+*/
+Word VTS__cmp_structural ( VTS* a, VTS* b )
+{
+   /* We just need to generate an arbitrary total ordering based on
+      a->ts and b->ts.  Preferably do it in a way which comes across likely
+      differences relatively quickly. */
+   Word     i, useda, usedb;
+   ScalarTS *tmpa, *tmpb;
+
+   tl_assert(a && a->ts);
+   tl_assert(b && b->ts);
+   useda = VG_(sizeXA)( a->ts );
+   usedb = VG_(sizeXA)( b->ts );
+
+   if (useda < usedb) return -1;
+   if (useda > usedb) return 1;
+
+   /* Same length vectors, so let's step through them together. */
+   tl_assert(useda == usedb);
+   for (i = 0; i < useda; i++) {
+      tmpa = VG_(indexXA)( a->ts, i );
+      tmpb = VG_(indexXA)( b->ts, i );
+      if (tmpa->tym < tmpb->tym) return -1;
+      if (tmpa->tym > tmpb->tym) return 1;
+      if (tmpa->thr < tmpb->thr) return -1;
+      if (tmpa->thr > tmpb->thr) return 1;
+   }
+
+   /* They're identical. */
+   return 0;
+}
+
+
+/* Debugging only.  Display the given VTS in the buffer.
+*/
+void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
+   ScalarTS* st;
+   HChar     unit[64];
+   Word      i, n;
+   Int       avail = nBuf;
+   tl_assert(vts && vts->ts);
+   tl_assert(nBuf > 16);
+   buf[0] = '[';
+   buf[1] = 0;
+   n = VG_(sizeXA)( vts->ts );
+   for (i = 0; i < n; i++) {
+      tl_assert(avail >= 40);
+      st = VG_(indexXA)( vts->ts, i );
+      VG_(memset)(unit, 0, sizeof(unit));
+      VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
+                         st->thr, st->tym);
+      if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
+         VG_(strcat)(buf, " ...]");
+         buf[nBuf-1] = 0;
+         return;
+      }
+      VG_(strcat)(buf, unit);
+      avail -= VG_(strlen)(unit);
+   }
+   VG_(strcat)(buf, "]");
+   buf[nBuf-1] = 0;
+}
+
+
+/* Debugging only.  Return vts[index], so to speak.
+*/
+ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
+   UWord i, n;
+   tl_assert(vts && vts->ts);
+   n = VG_(sizeXA)( vts->ts );
+   for (i = 0; i < n; i++) {
+      ScalarTS* st = VG_(indexXA)( vts->ts, i );
+      if (st->thr == idx)
+         return st->tym;
+   }
+   return 0;
+}
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION END vts primitives                                  //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION BEGIN main library                                  //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// VTS set                                             //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+static WordFM* /* VTS* void void */ vts_set = NULL;
+
+static void vts_set_init ( void )
+{
+   tl_assert(!vts_set);
+   vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
+                         HG_(free),
+                         (Word(*)(UWord,UWord))VTS__cmp_structural );
+   tl_assert(vts_set);
+}
+
+/* Given a newly made VTS, look in vts_set to see if we already have
+   an identical one.  If yes, free up this one and return instead a
+   pointer to the existing one.  If no, add this one to the set and
+   return the same pointer.  Caller differentiates the two cases by
+   comparing returned pointer with the supplied one (although that
+   does require that the supplied VTS is not already in the set).
+*/
+static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
+{
+   UWord keyW, valW;
+   /* lookup cand (by value) */
+   if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
+      /* found it */
+      tl_assert(valW == 0);
+      /* if this fails, cand (by ref) was already present (!) */
+      tl_assert(keyW != (UWord)cand);
+      VTS__delete(cand);
+      return (VTS*)keyW;
+   } else {
+      /* not present.  Add and return pointer to same. */
+      VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
+      return cand;
+   }
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// VTS table                                           //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+static void VtsID__invalidate_caches ( void ); /* fwds */
+
+/* A type to hold VTS table entries.  Invariants:
+   If .vts == NULL, then this entry is not in use, so:
+   - .rc == 0
+   - this entry is on the freelist (unfortunately, does not imply
+     any constraints on value for .nextfree)
+   If .vts != NULL, then this entry is in use:
+   - .vts is findable in vts_set
+   - .vts->id == this entry number
+   - no specific value for .rc (even 0 is OK)
+   - this entry is not on freelist, so .nextfree == VtsID_INVALID
+*/
+typedef
+   struct {
+      VTS*  vts;      /* vts, in vts_set */
+      UWord rc;       /* reference count - enough for entire aspace */
+      VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
+   }
+   VtsTE;
+
+/* The VTS table. */
+static XArray* /* of VtsTE */ vts_tab = NULL;
+
+/* An index into the VTS table, indicating the start of the list of
+   free (available for use) entries.  If the list is empty, this is
+   VtsID_INVALID. */
+static VtsID vts_tab_freelist = VtsID_INVALID;
+
+/* Do a GC of vts_tab when the freelist becomes empty AND the size of
+   vts_tab equals or exceeds this size.  After GC, the value here is
+   set appropriately so as to check for the next GC point. */
+static Word vts_next_GC_at = 1000;
+
+static void vts_tab_init ( void )
+{
+   vts_tab
+      = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
+                    HG_(free), sizeof(VtsTE) );
+   vts_tab_freelist
+      = VtsID_INVALID;
+   tl_assert(vts_tab);
+}
+
+/* Add ii to the free list, checking that it looks out-of-use. */
+static void add_to_free_list ( VtsID ii )
+{
+   VtsTE* ie = VG_(indexXA)( vts_tab, ii );
+   tl_assert(ie->vts == NULL);
+   tl_assert(ie->rc == 0);
+   tl_assert(ie->freelink == VtsID_INVALID);
+   ie->freelink = vts_tab_freelist;
+   vts_tab_freelist = ii;
+}
+
+/* Get an entry from the free list.  This will return VtsID_INVALID if
+   the free list is empty. */
+static VtsID get_from_free_list ( void )
+{
+   VtsID  ii;
+   VtsTE* ie;
+   if (vts_tab_freelist == VtsID_INVALID)
+      return VtsID_INVALID;
+   ii = vts_tab_freelist;
+   ie = VG_(indexXA)( vts_tab, ii );
+   tl_assert(ie->vts == NULL);
+   tl_assert(ie->rc == 0);
+   vts_tab_freelist = ie->freelink;
+   return ii;
+}
+
+/* Produce a new VtsID that can be used, either by getting it from
+   the freelist, or, if that is empty, by expanding vts_tab. */
+static VtsID get_new_VtsID ( void )
+{
+   VtsID ii;
+   VtsTE te;
+   ii = get_from_free_list();
+   if (ii != VtsID_INVALID)
+      return ii;
+   te.vts = NULL;
+   te.rc = 0;
+   te.freelink = VtsID_INVALID;
+   ii = (VtsID)VG_(addToXA)( vts_tab, &te );
+   return ii;
+}
+
+
+/* Indirect callback from lib_zsm. */
+static void VtsID__rcinc ( VtsID ii )
+{
+   VtsTE* ie;
+   /* VG_(indexXA) does a range check for us */
+   ie = VG_(indexXA)( vts_tab, ii );
+   tl_assert(ie->vts); /* else it's not in use */
+   tl_assert(ie->rc < ~0UL); /* else we can't continue */
+   tl_assert(ie->vts->id == ii);
+   ie->rc++;
+}
+
+/* Indirect callback from lib_zsm. */
+static void VtsID__rcdec ( VtsID ii )
+{
+   VtsTE* ie;
+   /* VG_(indexXA) does a range check for us */
+   ie = VG_(indexXA)( vts_tab, ii );
+   tl_assert(ie->vts); /* else it's not in use */
+   tl_assert(ie->rc > 0); /* else RC snafu */
+   tl_assert(ie->vts->id == ii);
+   ie->rc--;
+}
+
+
+/* Look up 'cand' in our collection of VTSs.  If present, deallocate
+   it and return the VtsID for the pre-existing version.  If not
+   present, add it to both vts_tab and vts_set, allocate a fresh VtsID
+   for it, and return that. */
+static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
+{
+   VTS* auld;
+   tl_assert(cand->id == VtsID_INVALID);
+   auld = vts_set__find_and_dealloc__or_add(cand);
+   if (auld != cand) {
+      /* We already have an Aulde one.  Use that. */
+      VtsTE* ie;
+      tl_assert(auld->id != VtsID_INVALID);
+      ie = VG_(indexXA)( vts_tab, auld->id );
+      tl_assert(ie->vts == auld);
+      return auld->id;
+   } else {
+      VtsID  ii = get_new_VtsID();
+      VtsTE* ie = VG_(indexXA)( vts_tab, ii );
+      ie->vts = cand;
+      ie->rc = 0;
+      ie->freelink = VtsID_INVALID;
+      cand->id = ii;
+      return ii;
+   }
+}
+
+
+static void show_vts_stats ( HChar* caller )
+{
+   UWord nSet, nTab, nLive;
+   ULong totrc;
+   UWord n, i;
+   nSet = VG_(sizeFM)( vts_set );
+   nTab = VG_(sizeXA)( vts_tab );
+   totrc = 0;
+   nLive = 0;
+   n = VG_(sizeXA)( vts_tab );
+   for (i = 0; i < n; i++) {
+      VtsTE* ie = VG_(indexXA)( vts_tab, i );
+      if (ie->vts) {
+         nLive++;
+         totrc += (ULong)ie->rc;
+      } else {
+         tl_assert(ie->rc == 0);
+      }
+   }
+   VG_(printf)("  show_vts_stats %s\n", caller);
+   VG_(printf)("    vts_tab size %4lu\n", nTab);
+   VG_(printf)("    vts_tab live %4lu\n", nLive);
+   VG_(printf)("    vts_set size %4lu\n", nSet);
+   VG_(printf)("        total rc %4llu\n", totrc);
+}
+
+/* NOT TO BE CALLED FROM WITHIN libzsm. */
+__attribute__((noinline))
+static void vts_tab__do_GC ( Bool show_stats )
+{
+   UWord i, nTab, nLive, nFreed;
+
+   /* check this is actually necessary. */
+   tl_assert(vts_tab_freelist == VtsID_INVALID);
+
+   /* empty the caches for partial order checks and binary joins.  We
+      could do better and prune out the entries to be deleted, but it
+      ain't worth the hassle. */
+   VtsID__invalidate_caches();
+
+   /* First, make the reference counts up to date. */
+   zsm_flush_cache();
+
+   nTab = VG_(sizeXA)( vts_tab );
+
+   if (show_stats) {
+      VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
+      show_vts_stats("before GC");
+   }
+
+   /* Now we can inspect the entire vts_tab.  Any entries
+      with zero .rc fields are now no longer in use and can be
+      free list, removed from vts_set, and deleted. */
+   nFreed = 0;
+   for (i = 0; i < nTab; i++) {
+      Bool present;
+      UWord oldK = 0, oldV = 0;
+      VtsTE* te = VG_(indexXA)( vts_tab, i );
+      if (te->vts == NULL) {
+         tl_assert(te->rc == 0);
+         continue; /* already on the free list (presumably) */
+      }
+      if (te->rc > 0)
+         continue; /* in use */
+      /* Ok, we got one we can free. */
+      tl_assert(te->vts->id == i);
+      /* first, remove it from vts_set. */
+      present = VG_(delFromFM)( vts_set,
+                                &oldK, &oldV, (UWord)te->vts );
+      tl_assert(present); /* else it isn't in vts_set ?! */
+      tl_assert(oldV == 0); /* no info stored in vts_set val fields */
+      tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
+      /* now free the VTS itself */
+      VTS__delete(te->vts);
+      te->vts = NULL;
+      /* and finally put this entry on the free list */
+      tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
+      add_to_free_list( i );
+      nFreed++;
+   }
+
+   /* Now figure out when the next GC should be.  We'll allow the
+      number of VTSs to double before GCing again.  Except of course
+      that since we can't (or, at least, don't) shrink vts_tab, we
+      can't set the threshhold value smaller than it. */
+   tl_assert(nFreed <= nTab);
+   nLive = nTab - nFreed;
+   tl_assert(nLive >= 0 && nLive <= nTab);
+   vts_next_GC_at = 2 * nLive;
+   if (vts_next_GC_at < nTab)
+      vts_next_GC_at = nTab;
+
+   if (show_stats) {
+      show_vts_stats("after GC");
+      VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
+   }
+
+   if (VG_(clo_verbosity) > 1) {
+      static UInt ctr = 0;
+      tl_assert(nTab > 0);
+      VG_(message)(Vg_DebugMsg,
+                  "libhb: VTS GC: #%u  old size %lu  live %lu  (%2llu%%)",
+                  ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
+   }
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Vts IDs                                             //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+//////////////////////////
+static ULong stats__getOrdering_queries = 0;
+static ULong stats__getOrdering_misses  = 0;
+static ULong stats__join2_queries       = 0;
+static ULong stats__join2_misses        = 0;
+
+static inline UInt ROL32 ( UInt w, Int n ) {
+   w = (w << n) | (w >> (32-n));
+   return w;
+}
+static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
+   UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
+   return hash % nTab;
+}
+
+#define N_GETORDERING_CACHE 1023
+static
+   struct { VtsID vi1; VtsID vi2; POrd ord; }
+   getOrdering_cache[N_GETORDERING_CACHE];
+
+#define N_JOIN2_CACHE 1023
+static
+   struct { VtsID vi1; VtsID vi2; VtsID res; }
+   join2_cache[N_JOIN2_CACHE];
+
+static void VtsID__invalidate_caches ( void ) {
+   Int i;
+   for (i = 0; i < N_GETORDERING_CACHE; i++) {
+      getOrdering_cache[i].vi1 = VtsID_INVALID;
+      getOrdering_cache[i].vi2 = VtsID_INVALID;
+      getOrdering_cache[i].ord = 0; /* an invalid POrd value */
+   }
+   for (i = 0; i < N_JOIN2_CACHE; i++) {
+     join2_cache[i].vi1 = VtsID_INVALID;
+     join2_cache[i].vi2 = VtsID_INVALID;
+     join2_cache[i].res = VtsID_INVALID;
+   }
+}
+//////////////////////////
+
+//static Bool VtsID__is_valid ( VtsID vi ) {
+//   VtsTE* ve;
+//   if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
+//      return False;
+//   ve = VG_(indexXA)( vts_tab, vi );
+//   if (!ve->vts)
+//      return False;
+//   tl_assert(ve->vts->id == vi);
+//   return True;
+//}
+
+static VTS* VtsID__to_VTS ( VtsID vi ) {
+   VtsTE* te = VG_(indexXA)( vts_tab, vi );
+   tl_assert(te->vts);
+   return te->vts;
+}
+
+static void VtsID__pp ( VtsID vi ) {
+   HChar buf[100];
+   VTS* vts = VtsID__to_VTS(vi);
+   VTS__show( buf, sizeof(buf)-1, vts );
+   buf[sizeof(buf)-1] = 0;
+   VG_(printf)("%s", buf);
+}
+
+/* compute partial ordering relation of vi1 and vi2. */
+__attribute__((noinline))
+static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
+   UInt hash;
+   POrd ord;
+   VTS  *v1, *v2;
+   //if (vi1 == vi2) return POrd_EQ;
+   tl_assert(vi1 != vi2);
+   ////++
+   stats__getOrdering_queries++;
+   hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
+   if (getOrdering_cache[hash].vi1 == vi1
+       && getOrdering_cache[hash].vi2 == vi2)
+      return getOrdering_cache[hash].ord;
+   stats__getOrdering_misses++;
+   ////--
+   v1  = VtsID__to_VTS(vi1);
+   v2  = VtsID__to_VTS(vi2);
+   ord = VTS__cmp( v1, v2 );
+   ////++
+   getOrdering_cache[hash].vi1 = vi1;
+   getOrdering_cache[hash].vi2 = vi2;
+   getOrdering_cache[hash].ord = ord;
+   ////--
+   return ord;
+}
+static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
+   return vi1 == vi2  ? POrd_EQ  : VtsID__getOrdering_WRK(vi1, vi2);
+}
+
+/* compute binary join */
+__attribute__((noinline))
+static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
+   UInt  hash;
+   VtsID res;
+   VTS   *vts1, *vts2, *nyu;
+   //if (vi1 == vi2) return vi1;
+   tl_assert(vi1 != vi2);
+   ////++
+   stats__join2_queries++;
+   hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
+   if (join2_cache[hash].vi1 == vi1
+       && join2_cache[hash].vi2 == vi2)
+      return join2_cache[hash].res;
+   stats__join2_misses++;
+   ////--
+   vts1 = VtsID__to_VTS(vi1);
+   vts2 = VtsID__to_VTS(vi2);
+   nyu  = VTS__join(vts1,vts2);
+   res  = vts_tab__find_and_dealloc__or_add(nyu);
+   ////++
+   join2_cache[hash].vi1 = vi1;
+   join2_cache[hash].vi2 = vi2;
+   join2_cache[hash].res = res;
+   ////--
+   return res;
+}
+static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
+   return vi1 == vi2  ? vi1  : VtsID__join2_WRK(vi1, vi2);
+}
+
+/* create a singleton VTS, namely [thr:1] */
+static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
+   VTS* nyu = VTS__singleton(thr,tym);
+   return vts_tab__find_and_dealloc__or_add(nyu);
+}
+
+/* tick operation, creates value 1 if specified index is absent */
+static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
+   VTS* vts = VtsID__to_VTS(vi);
+   VTS* nyu = VTS__tick(idx,vts);
+   return vts_tab__find_and_dealloc__or_add(nyu);
+}
+
+/* index into a VTS (only for assertions) */
+static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
+   VTS* vts = VtsID__to_VTS(vi);
+   return VTS__indexAt_SLOW( vts, idx );
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Threads                                             //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+struct _Thr {
+   /* Current VTSs for this thread.  They change as we go along.  viR
+      is the VTS to be used for reads, viW for writes.  Usually they
+      are the same, but can differ when we deal with reader-writer
+      locks.  It is always the case that VtsID__getOrdering(viW,viR)
+      == POrd_LT or POrdEQ -- that is, viW must be the same, or
+      lagging behind, viR. */
+   VtsID viR;
+   VtsID viW;
+   /* opaque (to us) data we hold on behalf of the library's user. */
+   void* opaque;
+};
+
+static Thr* Thr__new ( void ) {
+   Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
+   thr->viR = VtsID_INVALID;
+   thr->viW = VtsID_INVALID;
+   return thr;
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Shadow Values                                       //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
+// hb_zsm.h.  We have to do everything else here.
+
+/* SVal is 64 bit unsigned int.
+
+      <---------30--------->    <---------30--------->
+   00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X   C(Rmin,Wmin)
+   01 X--------------------X XX X--------------------X   E(rror)
+   10 X--------------------X XX X--------------------X   A: SVal_NOACCESS
+   11 X--------------------X XX X--------------------X   I: SVal_INVALID
+*/
+#define SVAL_TAGMASK (3ULL << 62)
+
+static inline Bool SVal__isC ( SVal s ) {
+   return (0ULL << 62) == (s & SVAL_TAGMASK);
+}
+static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
+   //tl_assert(VtsID__is_valid(rmini));
+   //tl_assert(VtsID__is_valid(wmini));
+   return (((ULong)rmini) << 32) | ((ULong)wmini);
+}
+static inline VtsID SVal__unC_Rmin ( SVal s ) {
+   tl_assert(SVal__isC(s));
+   return (VtsID)(s >> 32);
+}
+static inline VtsID SVal__unC_Wmin ( SVal s ) {
+   tl_assert(SVal__isC(s));
+   return (VtsID)(s & 0xFFFFFFFFULL);
+}
+
+static Bool SVal__isE ( SVal s ) {
+   return (1ULL << 62) == (s & SVAL_TAGMASK);
+}
+static SVal SVal__mkE ( void ) {
+   return 1ULL << 62;
+}
+
+static Bool SVal__isA ( SVal s ) {
+   return (2ULL << 62) == (s & SVAL_TAGMASK);
+}
+static SVal SVal__mkA ( void ) {
+   return 2ULL << 62;
+}
+
+/* Direct callback from lib_zsm. */
+static void SVal__rcinc ( SVal s ) {
+   if (SVal__isC(s)) {
+      VtsID__rcinc( SVal__unC_Rmin(s) );
+      VtsID__rcinc( SVal__unC_Wmin(s) );
+   }
+}
+
+/* Direct callback from lib_zsm. */
+static void SVal__rcdec ( SVal s ) {
+   if (SVal__isC(s)) {
+      VtsID__rcdec( SVal__unC_Rmin(s) );
+      VtsID__rcdec( SVal__unC_Wmin(s) );
+   }
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// A simple group (memory) allocator                   //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+//////////////// BEGIN general group allocator
+typedef
+   struct {
+      UWord   elemSzB;        /* element size */
+      UWord   nPerGroup;      /* # elems per group */
+      void*   (*alloc)(HChar*, SizeT); /* group allocator */
+      HChar*  cc; /* group allocator's cc */
+      void    (*free)(void*); /* group allocator's free-er (unused) */
+      /* XArray of void* (pointers to groups).  The groups themselves.
+         Each element is a pointer to a block of size (elemSzB *
+         nPerGroup) bytes. */
+      XArray* groups;
+      /* next free element.  Is a pointer to an element in one of the
+         groups pointed to by .groups. */
+      void* nextFree;
+   }
+   GroupAlloc;
+
+static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
+                              UWord  elemSzB,
+                              UWord  nPerGroup,
+                              void*  (*alloc)(HChar*, SizeT),
+                              HChar* cc,
+                              void   (*free)(void*) )
+{
+   tl_assert(0 == (elemSzB % sizeof(UWord)));
+   tl_assert(elemSzB >= sizeof(UWord));
+   tl_assert(nPerGroup >= 100); /* let's say */
+   tl_assert(alloc);
+   tl_assert(cc);
+   tl_assert(free);
+   tl_assert(ga);
+   VG_(memset)(ga, 0, sizeof(*ga));
+   ga->elemSzB   = elemSzB;
+   ga->nPerGroup = nPerGroup;
+   ga->groups    = NULL;
+   ga->alloc     = alloc;
+   ga->cc        = cc;
+   ga->free      = free;
+   ga->groups    = VG_(newXA)( alloc, cc, free, sizeof(void*) );
+   ga->nextFree  = NULL;
+   tl_assert(ga->groups);
+}
+
+/* The freelist is empty.  Allocate a new group and put all the new
+   elements in it onto the freelist. */
+__attribute__((noinline))
+static void gal_add_new_group ( GroupAlloc* ga ) 
+{
+   Word   i;
+   UWord* group;
+   tl_assert(ga);
+   tl_assert(ga->nextFree == NULL);
+   group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
+   tl_assert(group);
+   /* extend the freelist through the new group.  Place the freelist
+      pointer in the first word of each element.  That's why the
+      element size must be at least one word. */
+   for (i = ga->nPerGroup-1; i >= 0; i--) {
+      UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
+      UWord* elem  = (UWord*)elemC;
+      tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
+      *elem = (UWord)ga->nextFree;
+      ga->nextFree = elem;
+   }
+   /* and add to our collection of groups */
+   VG_(addToXA)( ga->groups, &group );
+}
+
+inline static void* gal_Alloc ( GroupAlloc* ga )
+{
+   UWord* elem;
+   if (UNLIKELY(ga->nextFree == NULL)) {
+      gal_add_new_group(ga);
+   }
+   elem = ga->nextFree;
+   ga->nextFree = (void*)*elem;
+   *elem = 0; /* unnecessary, but just to be on the safe side */
+   return elem;
+}
+
+inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
+{
+   tl_assert(n == ga->elemSzB);
+   return gal_Alloc( ga );
+}
+
+inline static void gal_Free ( GroupAlloc* ga, void* p )
+{
+   UWord* elem = (UWord*)p;
+   *elem = (UWord)ga->nextFree;
+   ga->nextFree = elem;
+}
+//////////////// END general group allocator
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Change-event map2                                   //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+#define EVENT_MAP_GC_DISCARD_FRACTION  0.5
+
+/* This is in two parts:
+
+   1. An OSet of RCECs.  This is a set of reference-counted stack
+      traces.  When the reference count of a stack trace becomes zero,
+      it is removed from the set and freed up.  The intent is to have
+      a set of stack traces which can be referred to from (2), but to
+      only represent each one once.  The set is indexed/searched by
+      ordering on the stack trace vectors.
+
+   2. A SparseWA of OldRefs.  These store information about each old
+      ref that we need to record.  It is indexed by address of the
+      location for which the information is recorded.  For LRU
+      purposes, each OldRef also contains a generation number,
+      indicating when it was most recently accessed.
+
+      The important part of an OldRef is, however, its accs[] array.
+      This is an array of N_OLDREF_ACCS which binds (thread, R/W,
+      size) triples to RCECs.  This allows us to collect the last
+      access-traceback by up to N_OLDREF_ACCS different triples for
+      this location.  The accs[] array is a MTF-array.  If a binding
+      falls off the end, that's too bad -- we will lose info about
+      that triple's access to this location.
+
+      When the SparseWA becomes too big, we can throw away the OldRefs
+      whose generation numbers are below some threshold; hence doing
+      approximate LRU discarding.  For each discarded OldRef we must
+      of course decrement the reference count on the all RCECs it
+      refers to, in order that entries from (1) eventually get
+      discarded too.
+
+   A major improvement in reliability of this mechanism would be to
+   have a dynamically sized OldRef.accs[] array, so no entries ever
+   fall off the end.  In investigations (Dec 08) it appears that a
+   major cause for the non-availability of conflicting-access traces
+   in race reports is caused by the fixed size of this array.  I
+   suspect for most OldRefs, only a few entries are used, but for a
+   minority of cases there is an overflow, leading to info lossage.
+   Investigations also suggest this is very workload and scheduling
+   sensitive.  Therefore a dynamic sizing would be better.
+
+   However, dynamic sizing would defeat the use of a GroupAllocator
+   for OldRef structures.  And that's important for performance.  So
+   it's not straightforward to do.
+*/
+
+
+static UWord stats__ctxt_rcdec1 = 0;
+static UWord stats__ctxt_rcdec2 = 0;
+static UWord stats__ctxt_rcdec3 = 0;
+static UWord stats__ctxt_rcdec_calls = 0;
+static UWord stats__ctxt_rcdec_discards = 0;
+static UWord stats__ctxt_rcdec1_eq = 0;
+
+static UWord stats__ctxt_tab_curr = 0;
+static UWord stats__ctxt_tab_max  = 0;
+
+static UWord stats__ctxt_tab_qs   = 0;
+static UWord stats__ctxt_tab_cmps = 0;
+
+
+///////////////////////////////////////////////////////
+//// Part (1): An OSet of RCECs
+///
+
+#define N_FRAMES 8
+
+// (UInt) `echo "Reference Counted Execution Context" | md5sum`
+#define RCEC_MAGIC 0xab88abb2UL
+
+//#define N_RCEC_TAB 98317 /* prime */
+#define N_RCEC_TAB 196613 /* prime */
+
+typedef
+   struct _RCEC {
+      UWord magic;  /* sanity check only */
+      struct _RCEC* next;
+      UWord rc;
+      UWord rcX; /* used for crosschecking */
+      UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
+   }
+   RCEC;
+
+static RCEC** contextTab = NULL; /* hash table of RCEC*s */
+
+
+/* Gives an arbitrary total order on RCEC .frames fields */
+static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
+   Word i;
+   tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
+   tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
+   if (ec1->frames[0] < ec2->frames[0]) return -1;
+   if (ec1->frames[0] > ec2->frames[0]) return 1;
+   for (i = 1; i < 1 + N_FRAMES; i++) {
+      if (ec1->frames[i] < ec2->frames[i]) return -1;
+      if (ec1->frames[i] > ec2->frames[i]) return 1;
+   }
+   return 0;
+}
+
+
+/* Dec the ref of this RCEC. */
+static void ctxt__rcdec ( RCEC* ec )
+{
+   stats__ctxt_rcdec_calls++;
+   tl_assert(ec && ec->magic == RCEC_MAGIC);
+   tl_assert(ec->rc > 0);
+   ec->rc--;
+}
+
+static void ctxt__rcinc ( RCEC* ec )
+{
+   tl_assert(ec && ec->magic == RCEC_MAGIC);
+   ec->rc++;
+}
+
+
+//////////// BEGIN RCEC group allocator
+static GroupAlloc rcec_group_allocator;
+
+static RCEC* alloc_RCEC ( void ) {
+   return gal_Alloc ( &rcec_group_allocator );
+}
+
+static void free_RCEC ( RCEC* rcec ) {
+   tl_assert(rcec->magic == RCEC_MAGIC);
+   gal_Free( &rcec_group_allocator, rcec );
+}
+//////////// END OldRef group allocator
+
+
+/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
+   move it one step closer the the front of the list, so as to make
+   subsequent searches for it cheaper. */
+static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
+{
+   RCEC *ec0, *ec1, *ec2;
+   if (ec == *headp)
+      tl_assert(0); /* already at head of list */
+   tl_assert(ec != NULL);
+   ec0 = *headp;
+   ec1 = NULL;
+   ec2 = NULL;
+   while (True) {
+      if (ec0 == NULL || ec0 == ec) break;
+      ec2 = ec1;
+      ec1 = ec0;
+      ec0 = ec0->next;
+   }
+   tl_assert(ec0 == ec);
+   if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
+      RCEC* tmp;
+      /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
+         predecessor.  Swap ec0 and ec1, that is, move ec0 one step
+         closer to the start of the list. */
+      tl_assert(ec2->next == ec1);
+      tl_assert(ec1->next == ec0);
+      tmp = ec0->next;
+      ec2->next = ec0;
+      ec0->next = ec1;
+      ec1->next = tmp;
+   }
+   else
+   if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
+      /* it's second in the list. */
+      tl_assert(*headp == ec1);
+      tl_assert(ec1->next == ec0);
+      ec1->next = ec0->next;
+      ec0->next = ec1;
+      *headp = ec0;
+   }
+}
+
+
+/* Find the given RCEC in the tree, and return a pointer to it.  Or,
+   if not present, add the given one to the tree (by making a copy of
+   it, so the caller can immediately deallocate the original) and
+   return a pointer to the copy.  The caller can safely have 'example'
+   on its stack, since we will always return a pointer to a copy of
+   it, not to the original.  Note that the inserted node will have .rc
+   of zero and so the caller must immediatly increment it. */
+__attribute__((noinline))
+static RCEC* ctxt__find_or_add ( RCEC* example )
+{
+   UWord hent;
+   RCEC* copy;
+   tl_assert(example && example->magic == RCEC_MAGIC);
+   tl_assert(example->rc == 0);
+
+   /* Search the hash table to see if we already have it. */
+   stats__ctxt_tab_qs++;
+   hent = example->frames[0] % N_RCEC_TAB;
+   copy = contextTab[hent];
+   while (1) {
+      if (!copy) break;
+      tl_assert(copy->magic == RCEC_MAGIC);
+      stats__ctxt_tab_cmps++;
+      if (0 == RCEC__cmp_by_frames(copy, example)) break;
+      copy = copy->next;
+   }
+
+   if (copy) {
+      tl_assert(copy != example);
+      /* optimisation: if it's not at the head of its list, move 1
+         step fwds, to make future searches cheaper */
+      if (copy != contextTab[hent]) {
+         move_RCEC_one_step_forward( &contextTab[hent], copy );
+      }
+   } else {
+      copy = alloc_RCEC();
+      tl_assert(copy != example);
+      *copy = *example;
+      copy->next = contextTab[hent];
+      contextTab[hent] = copy;
+      stats__ctxt_tab_curr++;
+      if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
+         stats__ctxt_tab_max = stats__ctxt_tab_curr;
+   }
+   return copy;
+}
+
+static inline UWord ROLW ( UWord w, Int n )
+{
+   Int bpw = 8 * sizeof(UWord);
+   w = (w << n) | (w >> (bpw-n));
+   return w;
+}
+
+__attribute__((noinline))
+static RCEC* get_RCEC ( Thr* thr )
+{
+   UWord hash, i;
+   RCEC  example;
+   example.magic = RCEC_MAGIC;
+   example.rc = 0;
+   example.rcX = 0;
+   main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
+   hash = 0;
+   for (i = 1; i < 1 + N_FRAMES; i++) {
+      hash ^= example.frames[i];
+      hash = ROLW(hash, 19);
+   }
+   example.frames[0] = hash;
+   return ctxt__find_or_add( &example );
+}
+
+///////////////////////////////////////////////////////
+//// Part (2):
+///  A SparseWA guest-addr -> OldRef, that refers to (1)
+///
+
+// (UInt) `echo "Old Reference Information" | md5sum`
+#define OldRef_MAGIC 0x30b1f075UL
+
+/* Records an access: a thread and a context.  The size
+   (1,2,4,8) and read-or-writeness are also encoded as
+   follows: bottom bit of .thr is 1 if write, 0 if read
+            bottom 2 bits of .rcec are encode size:
+            00 = 1, 01 = 2, 10 = 4, 11 = 8
+*/
+typedef  struct { Thr* thr; RCEC* rcec; }  Thr_n_RCEC;
+
+#define N_OLDREF_ACCS 5
+
+typedef
+   struct {
+      UWord magic;  /* sanity check only */
+      UWord gen;    /* when most recently accessed */
+                    /* or free list when not in use */
+      /* unused slots in this array have .thr == NULL */
+      Thr_n_RCEC accs[N_OLDREF_ACCS];
+   }
+   OldRef;
+
+
+//////////// BEGIN OldRef group allocator
+static GroupAlloc oldref_group_allocator;
+
+static OldRef* alloc_OldRef ( void ) {
+   return gal_Alloc ( &oldref_group_allocator );
+}
+
+static void free_OldRef ( OldRef* r ) {
+   tl_assert(r->magic == OldRef_MAGIC);
+   gal_Free( &oldref_group_allocator, r );
+}
+//////////// END OldRef group allocator
+
+
+static SparseWA* oldrefTree     = NULL; /* SparseWA* OldRef* */
+static UWord     oldrefGen      = 0;    /* current LRU generation # */
+static UWord     oldrefTreeN    = 0;    /* # elems in oldrefTree */
+static UWord     oldrefGenIncAt = 0;    /* inc gen # when size hits this */
+
+inline static void* ptr_or_UWord ( void* p, UWord w ) {
+   return (void*)( ((UWord)p) | ((UWord)w) );
+}
+inline static void* ptr_and_UWord ( void* p, UWord w ) {
+   return (void*)( ((UWord)p) & ((UWord)w) );
+}
+
+inline static UInt min_UInt ( UInt a, UInt b ) {
+   return a < b ? a : b;
+}
+
+/* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
+   first interval is lower, 1 if the first interval is higher, and 0
+   if there is any overlap.  Redundant paranoia with casting is there
+   following what looked distinctly like a bug in gcc-4.1.2, in which
+   some of the comparisons were done signedly instead of
+   unsignedly. */
+/* Copied from exp-ptrcheck/sg_main.c */
+static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
+                                     Addr a2, SizeT n2 ) {
+   UWord a1w = (UWord)a1;
+   UWord n1w = (UWord)n1;
+   UWord a2w = (UWord)a2;
+   UWord n2w = (UWord)n2;
+   tl_assert(n1w > 0 && n2w > 0);
+   if (a1w + n1w <= a2w) return -1L;
+   if (a2w + n2w <= a1w) return 1L;
+   return 0;
+}
+
+static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
+{
+   OldRef* ref;
+   RCEC*   rcec;
+   Word    i, j;
+   UWord   keyW, valW;
+   Bool    b;
+
+   rcec = get_RCEC( thr );
+   ctxt__rcinc(rcec);
+
+   /* encode the size and writeness of the transaction in the bottom
+      two bits of thr and rcec. */
+   thr = ptr_or_UWord(thr, isW ? 1 : 0);
+   switch (szB) {
+      /* This doesn't look particularly branch-predictor friendly. */
+      case 1:  rcec = ptr_or_UWord(rcec, 0); break;
+      case 2:  rcec = ptr_or_UWord(rcec, 1); break;
+      case 4:  rcec = ptr_or_UWord(rcec, 2); break;
+      case 8:  rcec = ptr_or_UWord(rcec, 3); break;
+      default: tl_assert(0);
+   }
+
+   /* Look in the map to see if we already have this. */
+   b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
+
+   if (b) {
+
+      /* We already have a record for this address.  We now need to
+         see if we have a stack trace pertaining to this (thread, R/W,
+         size) triple. */
+      tl_assert(keyW == a);
+      ref = (OldRef*)valW;
+      tl_assert(ref->magic == OldRef_MAGIC);
+
+      tl_assert(thr);
+      for (i = 0; i < N_OLDREF_ACCS; i++) {
+         if (ref->accs[i].thr != thr)
+            continue;
+         /* since .thr encodes both the accessing thread and the
+            read/writeness, we know now that at least those features
+            of the access match this entry.  So we just need to check
+            the size indication.  Do this by inspecting the lowest 2 bits of
+            .rcec, which contain the encoded size info. */
+         if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
+            continue;
+         /* else we have a match, so stop looking. */
+         break;
+      }
+
+      if (i < N_OLDREF_ACCS) {
+         /* thread 'thr' has an entry at index 'i'.  Update it. */
+         if (i > 0) {
+            Thr_n_RCEC tmp = ref->accs[i-1];
+            ref->accs[i-1] = ref->accs[i];
+            ref->accs[i] = tmp;
+            i--;
+         }
+         if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
+         stats__ctxt_rcdec1++;
+         ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
+         ref->accs[i].rcec = rcec;
+         tl_assert(ref->accs[i].thr == thr);
+      } else {
+         /* No entry for this (thread, R/W, size) triple.  Shuffle all
+            of them down one slot, and put the new entry at the start
+            of the array. */
+         if (ref->accs[N_OLDREF_ACCS-1].thr) {
+            /* the last slot is in use.  We must dec the rc on the
+               associated rcec. */
+            tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
+            stats__ctxt_rcdec2++;
+            if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
+               VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
+            ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
+         } else {
+            tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
+         }
+         for (j = N_OLDREF_ACCS-1; j >= 1; j--)
+            ref->accs[j] = ref->accs[j-1];
+         ref->accs[0].thr = thr;
+         ref->accs[0].rcec = rcec;
+         /* thr==NULL is used to signify an empty slot, so we can't
+            add a NULL thr. */
+         tl_assert(ptr_and_UWord(thr, ~3) != 0); 
+      }
+
+      ref->gen = oldrefGen;
+
+   } else {
+
+      /* We don't have a record for this address.  Create a new one. */
+      if (oldrefTreeN >= oldrefGenIncAt) {
+         oldrefGen++;
+         oldrefGenIncAt = oldrefTreeN + 50000;
+         if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
+                            oldrefGen, oldrefTreeN );
+      }
+
+      ref = alloc_OldRef();
+      ref->magic = OldRef_MAGIC;
+      ref->gen = oldrefGen;
+      ref->accs[0].rcec = rcec;
+      ref->accs[0].thr = thr;
+      /* thr==NULL is used to signify an empty slot, so we can't add a
+         NULL thr. */
+      tl_assert(ptr_and_UWord(thr, ~3) != 0); 
+      for (j = 1; j < N_OLDREF_ACCS; j++) {
+         ref->accs[j].thr = NULL;
+         ref->accs[j].rcec = NULL;
+      }
+      VG_(addToSWA)( oldrefTree, a, (UWord)ref );
+      oldrefTreeN++;
+
+   }
+}
+
+
+Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
+                              /*OUT*/Thr**  resThr,
+                              /*OUT*/SizeT* resSzB,
+                              /*OUT*/Bool*  resIsW,
+                              Thr* thr, Addr a, SizeT szB, Bool isW )
+{
+   Word    i, j;
+   OldRef* ref;
+   UWord   keyW, valW;
+   Bool    b;
+
+   Thr*    cand_thr;
+   RCEC*   cand_rcec;
+   Bool    cand_isW;
+   SizeT   cand_szB;
+   Addr    cand_a;
+
+   Addr toCheck[15];
+   Int  nToCheck = 0;
+
+   tl_assert(thr);
+   tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
+
+   toCheck[nToCheck++] = a;
+   for (i = -7; i < (Word)szB; i++) {
+      if (i != 0)
+         toCheck[nToCheck++] = a + i;
+   }
+   tl_assert(nToCheck <= 15);
+
+   /* Now see if we can find a suitable matching event for
+      any of the addresses in toCheck[0 .. nToCheck-1]. */
+   for (j = 0; j < nToCheck; j++) {
+
+      cand_a = toCheck[j];
+      //      VG_(printf)("test %ld %p\n", j, cand_a);
+
+      b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
+      if (!b)
+         continue;
+
+      ref = (OldRef*)valW;
+      tl_assert(keyW == cand_a);
+      tl_assert(ref->magic == OldRef_MAGIC);
+      tl_assert(ref->accs[0].thr); /* first slot must always be used */
+
+      cand_thr  = NULL;
+      cand_rcec = NULL;
+      cand_isW  = False;
+      cand_szB  = 0;
+
+      for (i = 0; i < N_OLDREF_ACCS; i++) {
+         Thr_n_RCEC* cand = &ref->accs[i];
+         cand_thr  = ptr_and_UWord(cand->thr, ~3);
+         cand_rcec = ptr_and_UWord(cand->rcec, ~3);
+         /* Decode the writeness from the bottom bit of .thr. */
+         cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
+         /* Decode the size from the bottom two bits of .rcec. */
+         switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
+            case 0:  cand_szB = 1; break;
+            case 1:  cand_szB = 2; break;
+            case 2:  cand_szB = 4; break;
+            case 3:  cand_szB = 8; break;
+            default: tl_assert(0);
+         }
+
+         if (cand_thr == NULL) 
+            /* This slot isn't in use.  Ignore it. */
+            continue;
+
+         if (cand_thr == thr)
+            /* This is an access by the same thread, but we're only
+               interested in accesses from other threads.  Ignore. */
+            continue;
+
+         if ((!cand_isW) && (!isW))
+            /* We don't want to report a read racing against another
+               read; that's stupid.  So in this case move on. */
+            continue;
+
+         if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
+            /* No overlap with the access we're asking about.  Ignore. */
+            continue;
+
+         /* We have a match.  Stop searching. */
+         break;
+      }
+
+      tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
+
+      if (i < N_OLDREF_ACCS) {
+         /* return with success */
+         tl_assert(cand_thr);
+         tl_assert(cand_rcec);
+         tl_assert(cand_rcec->magic == RCEC_MAGIC);
+         tl_assert(cand_szB >= 1);
+         *resEC  = VG_(make_ExeContext_from_StackTrace)(
+                      &cand_rcec->frames[1],
+                      min_UInt(N_FRAMES, VG_(clo_backtrace_size))
+                   );
+         *resThr = cand_thr;
+         *resSzB = cand_szB;
+         *resIsW = cand_isW;
+         return True;
+      }
+
+      /* consider next address in toCheck[] */
+   } /* for (j = 0; j < nToCheck; j++) */
+
+   /* really didn't find anything. */
+   return False;
+}
+
+static void event_map_init ( void )
+{
+   Word i;
+
+   /* Context (RCEC) group allocator */
+   init_GroupAlloc ( &rcec_group_allocator,
+                     sizeof(RCEC),
+                     1000 /* RCECs per group */,
+                     HG_(zalloc),
+                     "libhb.event_map_init.1 (RCEC groups)",
+                     HG_(free) );
+
+   /* Context table */
+   tl_assert(!contextTab);
+   contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
+                             N_RCEC_TAB * sizeof(RCEC*) );
+   tl_assert(contextTab);
+   for (i = 0; i < N_RCEC_TAB; i++)
+      contextTab[i] = NULL;
+
+   /* Oldref group allocator */
+   init_GroupAlloc ( &oldref_group_allocator,
+                     sizeof(OldRef),
+                     1000 /* OldRefs per group */,
+                     HG_(zalloc),
+                     "libhb.event_map_init.3 (OldRef groups)",
+                     HG_(free) );
+
+   /* Oldref tree */
+   tl_assert(!oldrefTree);
+   oldrefTree = VG_(newSWA)(
+                   HG_(zalloc),
+                   "libhb.event_map_init.4 (oldref tree)", 
+                   HG_(free)
+                );
+   tl_assert(oldrefTree);
+
+   oldrefGen = 0;
+   oldrefGenIncAt = 0;
+   oldrefTreeN = 0;
+}
+
+static void event_map__check_reference_counts ( Bool before )
+{
+   RCEC*   rcec;
+   OldRef* oldref;
+   Word    i;
+   UWord   nEnts = 0;
+   UWord   keyW, valW;
+
+   /* Set the 'check' reference counts to zero.  Also, optionally
+      check that the real reference counts are non-zero.  We allow
+      these to fall to zero before a GC, but the GC must get rid of
+      all those that are zero, hence none should be zero after a
+      GC. */
+   for (i = 0; i < N_RCEC_TAB; i++) {
+      for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
+         nEnts++;
+         tl_assert(rcec);
+         tl_assert(rcec->magic == RCEC_MAGIC);
+         if (!before)
+            tl_assert(rcec->rc > 0);
+         rcec->rcX = 0;
+      }
+   }
+
+   /* check that the stats are sane */
+   tl_assert(nEnts == stats__ctxt_tab_curr);
+   tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
+
+   /* visit all the referencing points, inc check ref counts */
+   VG_(initIterSWA)( oldrefTree );
+   while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
+      oldref = (OldRef*)valW;
+      tl_assert(oldref->magic == OldRef_MAGIC);
+      for (i = 0; i < N_OLDREF_ACCS; i++) {
+         Thr*  aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
+         RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
+         if (aThr) {
+            tl_assert(aRef);
+            tl_assert(aRef->magic == RCEC_MAGIC);
+            aRef->rcX++;
+         } else {
+            tl_assert(!aRef);
+         }
+      }
+   }
+
+   /* compare check ref counts with actual */
+   for (i = 0; i < N_RCEC_TAB; i++) {
+      for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
+         tl_assert(rcec->rc == rcec->rcX);
+      }
+   }
+}
+
+__attribute__((noinline))
+static void event_map_maybe_GC ( void )
+{
+   OldRef* oldref;
+   UWord   keyW, valW, retained, maxGen;
+   XArray* refs2del;
+   Word    i, j, n2del;
+
+   UWord* genMap      = NULL;
+   UWord  genMap_min  = 0;
+   UWord  genMap_size = 0;
+
+   if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
+      return;
+
+   if (0)
+      VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
+
+   /* Check for sane command line params.  Limit values must match
+      those in hg_process_cmd_line_option. */
+   tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
+   tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 );
+
+   /* Check our counting is sane (expensive) */
+   if (CHECK_CEM)
+      tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
+
+   /* Check the reference counts (expensive) */
+   if (CHECK_CEM)
+      event_map__check_reference_counts( True/*before*/ );
+
+   /* Compute the distribution of generation values in the ref tree.
+      There are likely only to be a few different generation numbers
+      in the whole tree, but we don't know what they are.  Hence use a
+      dynamically resized array of counters.  The array is genMap[0
+      .. genMap_size-1], where genMap[0] is the count for the
+      generation number genMap_min, genMap[1] is the count for
+      genMap_min+1, etc.  If a new number is seen outside the range
+      [genMap_min .. genMap_min + genMap_size - 1] then the array is
+      copied into a larger array, and genMap_min and genMap_size are
+      adjusted accordingly. */
+
+   /* genMap :: generation-number -> count-of-nodes-with-that-number */
+
+   VG_(initIterSWA)( oldrefTree );
+   while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
+
+       UWord ea, key;
+       oldref = (OldRef*)valW;
+       key = oldref->gen;
+
+      /* BEGIN find 'ea', which is the index in genMap holding the
+         count for generation number 'key'. */
+      if (UNLIKELY(genMap == NULL)) {
+         /* deal with the first key to be seen, so that the following
+            cases don't need to handle the complexity of a NULL count
+            array. */
+         genMap_min  = key;
+         genMap_size = 1;
+         genMap = HG_(zalloc)( "libhb.emmG.1a",
+                                genMap_size * sizeof(UWord) );
+         ea = 0;
+         if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
+                            key, genMap_min, genMap_min+genMap_size- 1 );
+      }
+      else
+      if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
+         /* this is the expected (almost-always-happens) case: 'key'
+            is already mapped in the array. */
+         ea = key - genMap_min;
+      }
+      else
+      if (key < genMap_min) {
+         /* 'key' appears before the start of the current array.
+            Extend the current array by allocating a larger one and
+            copying the current one to the upper end of it. */
+         Word   more;
+         UWord* map2;
+         more = genMap_min - key;
+         tl_assert(more > 0);
+         map2 = HG_(zalloc)( "libhb.emmG.1b",
+                             (genMap_size + more) * sizeof(UWord) );
+         VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
+         HG_(free)( genMap );
+         genMap = map2;
+         genMap_size += more;
+         genMap_min -= more;
+         ea = 0;
+         tl_assert(genMap_min == key);
+         if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
+                            key, genMap_min,  genMap_min+genMap_size- 1 );
+      }
+      else {
+         /* 'key' appears after the end of the current array.  Extend
+            the current array by allocating a larger one and copying
+            the current one to the lower end of it. */
+         Word   more;
+         UWord* map2;
+         tl_assert(key >= genMap_min + genMap_size);
+         more = key - (genMap_min + genMap_size) + 1;
+         tl_assert(more > 0);
+         map2 = HG_(zalloc)( "libhb.emmG.1c",
+                             (genMap_size + more) * sizeof(UWord) );
+         VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
+         HG_(free)( genMap );
+         genMap = map2;
+         genMap_size += more;
+         ea = genMap_size - 1;;
+         tl_assert(genMap_min + genMap_size - 1 == key);
+         if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
+                            key, genMap_min, genMap_min+genMap_size- 1 );
+      }
+      /* END find 'ea' from 'key' */
+
+      tl_assert(ea >= 0 && ea < genMap_size);
+      /* and the whole point of this elaborate computation of 'ea' is .. */
+      genMap[ea]++;
+   }
+
+   tl_assert(genMap);
+   tl_assert(genMap_size > 0);
+
+   /* Sanity check what we just computed */
+   { UWord sum = 0;
+     for (i = 0; i < genMap_size; i++) {
+        if (0) VG_(printf)("  xxx: gen %ld has %lu\n",
+                           i + genMap_min, genMap[i] );
+        sum += genMap[i];
+     }
+     tl_assert(sum == oldrefTreeN);
+   }
+
+   /* Figure out how many generations to throw away */
+   retained = oldrefTreeN;
+   maxGen = 0;
+
+   for (i = 0; i < genMap_size; i++) {
+      keyW = i + genMap_min;
+      valW = genMap[i];
+      tl_assert(keyW > 0); /* can't allow a generation # 0 */
+      if (0) VG_(printf)("  XXX: gen %lu has %lu\n", keyW, valW );
+      tl_assert(keyW >= maxGen);
+      tl_assert(retained >= valW);
+      if (retained - valW
+          > (UWord)(HG_(clo_conflict_cache_size) 
+                    * EVENT_MAP_GC_DISCARD_FRACTION)) {
+         retained -= valW;
+         maxGen = keyW;
+      } else {
+         break;
+      }
+   }
+
+   HG_(free)(genMap);
+
+   tl_assert(retained >= 0 && retained <= oldrefTreeN);
+
+   /* Now make up a big list of the oldrefTree entries we want to
+      delete.  We can't simultaneously traverse the tree and delete
+      stuff from it, so first we need to copy them off somewhere
+      else. (sigh) */
+   refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
+                          HG_(free), sizeof(Addr) );
+
+   if (retained < oldrefTreeN) {
+
+      /* This is the normal (expected) case.  We discard any ref whose
+         generation number <= maxGen. */
+      VG_(initIterSWA)( oldrefTree );
+      while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
+         oldref = (OldRef*)valW;
+         tl_assert(oldref->magic == OldRef_MAGIC);
+         if (oldref->gen <= maxGen) {
+            VG_(addToXA)( refs2del, &keyW );
+         }
+      }
+      if (VG_(clo_verbosity) > 1) {
+         VG_(message)(Vg_DebugMsg,
+            "libhb: EvM GC: delete generations %lu and below, "
+            "retaining %lu entries",
+            maxGen, retained );
+      }
+
+   } else {
+
+      static UInt rand_seed = 0; /* leave as static */
+
+      /* Degenerate case: there's only one generation in the entire
+         tree, so we need to have some other way of deciding which
+         refs to throw away.  Just throw out half of them randomly. */
+      tl_assert(retained == oldrefTreeN);
+      VG_(initIterSWA)( oldrefTree );
+      while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
+         UInt n;
+         oldref = (OldRef*)valW;
+         tl_assert(oldref->magic == OldRef_MAGIC);
+         n = VG_(random)( &rand_seed );
+         if ((n & 0xFFF) < 0x800) {
+            VG_(addToXA)( refs2del, &keyW );
+            retained--;
+         }
+      }
+      if (VG_(clo_verbosity) > 1) {
+         VG_(message)(Vg_DebugMsg,
+            "libhb: EvM GC: randomly delete half the entries, "
+            "retaining %lu entries",
+            retained );
+      }
+
+   }
+
+   n2del = VG_(sizeXA)( refs2del );
+   tl_assert(n2del == (Word)(oldrefTreeN - retained));
+
+   if (0) VG_(printf)("%s","deleting entries\n");
+   for (i = 0; i < n2del; i++) {
+      Bool  b;
+      Addr  ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
+      b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
+      tl_assert(b);
+      tl_assert(keyW == ga2del);
+      oldref = (OldRef*)valW;
+      for (j = 0; j < N_OLDREF_ACCS; j++) {
+         Thr*  aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
+         RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
+         if (aRef) {
+            tl_assert(aThr);
+            stats__ctxt_rcdec3++;
+            ctxt__rcdec( aRef );
+         } else {
+            tl_assert(!aThr);
+         }
+      }
+
+      free_OldRef( oldref );
+   }
+
+   VG_(deleteXA)( refs2del );
+
+   tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
+
+   oldrefTreeN = retained;
+   oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
+
+   /* Throw away all RCECs with zero reference counts */
+   for (i = 0; i < N_RCEC_TAB; i++) {
+      RCEC** pp = &contextTab[i];
+      RCEC*  p  = *pp;
+      while (p) {
+         if (p->rc == 0) {
+            *pp = p->next;
+            free_RCEC(p);
+            p = *pp;
+            tl_assert(stats__ctxt_tab_curr > 0);
+            stats__ctxt_tab_curr--;
+         } else {
+            pp = &p->next;
+            p = p->next;
+         }
+      }
+   }
+
+   /* Check the reference counts (expensive) */
+   if (CHECK_CEM)
+      event_map__check_reference_counts( False/*after*/ );
+
+   //if (0)
+   //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
+   //            VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
+
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Core MSM                                            //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+/* Logic in msm_read/msm_write updated/verified after re-analysis,
+   19 Nov 08. */
+
+/* 19 Nov 08: it seems that MSM_RACE2ERR == 1 is a bad idea.  When
+   nonzero, the effect is that when a race is detected for a location,
+   that location is put into a special 'error' state and no further
+   checking of it is done until it returns to a 'normal' state, which
+   requires it to be deallocated and reallocated.
+
+   This is a bad idea, because of the interaction with suppressions.
+   Suppose there is a race on the location, but the error is
+   suppressed.  The location now is marked as in-error.  Now any
+   subsequent race -- including ones we want to see -- will never be
+   detected until the location is deallocated and reallocated.
+
+   Hence set MSM_RACE2ERR to zero.  This causes raced-on locations to
+   remain in the normal 'C' (constrained) state, but places on them
+   the constraint that the next accesses happen-after both the
+   existing constraint and the relevant vector clock of the thread
+   doing the racing access.
+*/
+#define MSM_RACE2ERR 0
+
+static ULong stats__msm_read         = 0;
+static ULong stats__msm_read_change  = 0;
+static ULong stats__msm_write        = 0;
+static ULong stats__msm_write_change = 0;
+
+__attribute__((noinline))
+static void record_race_info ( Thr* acc_thr, 
+                               Addr acc_addr, SizeT szB, Bool isWrite )
+{
+   /* Call here to report a race.  We just hand it onwards to
+      HG_(record_error_Race).  If that in turn discovers that the
+      error is going to be collected, then that queries the
+      conflicting-event map.  The alternative would be to query it
+      right here.  But that causes a lot of pointless queries for
+      errors which will shortly be discarded as duplicates, and can
+      become a performance overhead; so we defer the query until we
+      know the error is not a duplicate. */
+   tl_assert(acc_thr->opaque);
+   HG_(record_error_Race)( acc_thr->opaque, acc_addr,
+                           szB, isWrite, NULL/*mb_lastlock*/ );
+}
+
+static Bool is_sane_SVal_C ( SVal sv ) {
+   POrd ord;
+   if (!SVal__isC(sv)) return True;
+   ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
+   if (ord == POrd_EQ || ord == POrd_LT) return True;
+   return False;
+}
+
+
+/* Compute new state following a read */
+static inline SVal msm_read ( SVal svOld,
+                              /* The following are only needed for 
+                                 creating error reports. */
+                              Thr* acc_thr,
+                              Addr acc_addr, SizeT szB )
+{
+   SVal svNew = SVal_INVALID;
+   stats__msm_read++;
+
+   /* Redundant sanity check on the constraints */
+   if (CHECK_MSM) {
+      tl_assert(is_sane_SVal_C(svOld));
+   }
+
+   if (SVal__isC(svOld)) {
+      POrd  ord;
+      VtsID tviR  = acc_thr->viR;
+      VtsID tviW  = acc_thr->viW;
+      VtsID rmini = SVal__unC_Rmin(svOld);
+      VtsID wmini = SVal__unC_Wmin(svOld);
+
+      ord = VtsID__getOrdering(rmini,tviR);
+      if (ord == POrd_EQ || ord == POrd_LT) {
+         /* no race */
+         /* Note: RWLOCK subtlety: use tviW, not tviR */
+         svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
+         goto out;
+      } else {
+         /* assert on sanity of constraints. */
+         POrd  ordxx = VtsID__getOrdering(rmini,wmini);
+         tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
+         svNew = MSM_RACE2ERR
+                    ? SVal__mkE()
+                    /* see comments on corresponding fragment in
+                       msm_write for explanation. */
+                    /* aggressive setting: */
+                    /* 
+                    : SVal__mkC( VtsID__join2(wmini,tviR),
+                                 VtsID__join2(wmini,tviW) );
+                    */
+                    /* "consistent" setting: */ 
+                    : SVal__mkC( VtsID__join2(rmini,tviR),
+                                 VtsID__join2(wmini,tviW) );
+         record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/ );
+         goto out;
+      }
+   }
+   if (SVal__isA(svOld)) {
+      /* reading no-access memory (sigh); leave unchanged */
+      /* check for no pollution */
+      tl_assert(svOld == SVal_NOACCESS);
+      svNew = SVal_NOACCESS;
+      goto out;
+   }
+   if (SVal__isE(svOld)) {
+      /* no race, location is already "in error" */
+      svNew = SVal__mkE();
+      goto out;
+   }
+   VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
+   tl_assert(0);
+
+  out:
+   if (CHECK_MSM) {
+      tl_assert(is_sane_SVal_C(svNew));
+   }
+   tl_assert(svNew != SVal_INVALID);
+   if (svNew != svOld && HG_(clo_show_conflicts)) {
+      if (SVal__isC(svOld) && SVal__isC(svNew)) {
+         event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
+         stats__msm_read_change++;
+      }
+   }
+   return svNew;
+}
+
+
+/* Compute new state following a write */
+static inline SVal msm_write ( SVal svOld,
+                              /* The following are only needed for 
+                                 creating error reports. */
+                              Thr* acc_thr,
+                              Addr acc_addr, SizeT szB )
+{
+   SVal svNew = SVal_INVALID;
+   stats__msm_write++;
+
+   /* Redundant sanity check on the constraints */
+   if (CHECK_MSM) {
+      tl_assert(is_sane_SVal_C(svOld));
+   }
+
+   if (SVal__isC(svOld)) {
+      POrd  ord;
+      VtsID tviW  = acc_thr->viW;
+      VtsID wmini = SVal__unC_Wmin(svOld);
+
+      ord = VtsID__getOrdering(wmini,tviW);
+      if (ord == POrd_EQ || ord == POrd_LT) {
+         /* no race */
+         svNew = SVal__mkC( tviW, tviW );
+         goto out;
+      } else {
+         VtsID tviR  = acc_thr->viR;
+         VtsID rmini = SVal__unC_Rmin(svOld);
+         /* assert on sanity of constraints. */
+         POrd  ordxx = VtsID__getOrdering(rmini,wmini);
+         tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
+         svNew = MSM_RACE2ERR
+                    ? SVal__mkE()
+                    /* One possibility is, after a race is seen, to
+                       set the location's constraints as aggressively
+                       (as far ahead) as possible.  However, that just
+                       causes lots more races to be reported, which is
+                       very confusing.  Hence don't do this. */
+                    /*
+                    : SVal__mkC( VtsID__join2(wmini,tviR),
+                                 VtsID__join2(wmini,tviW) );
+                    */
+                    /* instead, re-set the constraints in a way which
+                       is consistent with (ie, as they would have been
+                       computed anyway) had no race been detected. */
+                    : SVal__mkC( VtsID__join2(rmini,tviR),
+                                 VtsID__join2(wmini,tviW) );
+         record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/ );
+         goto out;
+      }
+   }
+   if (SVal__isA(svOld)) {
+      /* writing no-access memory (sigh); leave unchanged */
+      /* check for no pollution */
+      tl_assert(svOld == SVal_NOACCESS);
+      svNew = SVal_NOACCESS;
+      goto out;
+   }
+   if (SVal__isE(svOld)) {
+      /* no race, location is already "in error" */
+      svNew = SVal__mkE();
+      goto out;
+   }
+   VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
+   tl_assert(0);
+
+  out:
+   if (CHECK_MSM) {
+      tl_assert(is_sane_SVal_C(svNew));
+   }
+   tl_assert(svNew != SVal_INVALID);
+   if (svNew != svOld && HG_(clo_show_conflicts)) {
+      if (SVal__isC(svOld) && SVal__isC(svNew)) {
+         event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
+         stats__msm_write_change++;
+      }
+   }
+   return svNew;
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Apply core MSM to specific memory locations         //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+/*------------- ZSM accesses: 8 bit apply ------------- */
+
+void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read8s++;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 .. 7 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+      SVal* tree = &cl->svals[tno << 3];
+      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+      if (CHECK_ZSM)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_read( svOld, thr,a,1 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+}
+
+void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read8s++;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 .. 7 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+      SVal* tree = &cl->svals[tno << 3];
+      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+      if (CHECK_ZSM)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_write( svOld, thr,a,1 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+}
+
+/*------------- ZSM accesses: 16 bit apply ------------- */
+
+void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read16s++;
+   if (UNLIKELY(!aligned16(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+      if (valid_value_is_below_me_16(descr, toff)) {
+         goto slowcase;
+      } else {
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+      }
+      if (CHECK_ZSM)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_read( svOld, thr,a,2 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_16to8splits++;
+   zsm_apply8___msm_read( thr, a + 0 );
+   zsm_apply8___msm_read( thr, a + 1 );
+}
+
+void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read16s++;
+   if (UNLIKELY(!aligned16(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+      if (valid_value_is_below_me_16(descr, toff)) {
+         goto slowcase;
+      } else {
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+      }
+      if (CHECK_ZSM)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_write( svOld, thr,a,2 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_16to8splits++;
+   zsm_apply8___msm_write( thr, a + 0 );
+   zsm_apply8___msm_write( thr, a + 1 );
+}
+
+/*------------- ZSM accesses: 32 bit apply ------------- */
+
+void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   if (UNLIKELY(!aligned32(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 or 4 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+      if (valid_value_is_above_me_32(descr, toff)) {
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+      } else {
+         goto slowcase;
+      }
+      if (CHECK_ZSM)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_read( svOld, thr,a,4 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_32to16splits++;
+   zsm_apply16___msm_read( thr, a + 0 );
+   zsm_apply16___msm_read( thr, a + 2 );
+}
+
+void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   if (UNLIKELY(!aligned32(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 or 4 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+      if (valid_value_is_above_me_32(descr, toff)) {
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+      } else {
+         goto slowcase;
+      }
+      if (CHECK_ZSM)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_write( svOld, thr,a,4 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_32to16splits++;
+   zsm_apply16___msm_write( thr, a + 0 );
+   zsm_apply16___msm_write( thr, a + 2 );
+}
+
+/*------------- ZSM accesses: 64 bit apply ------------- */
+
+void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno;
+   //UWord      toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read64s++;
+   if (UNLIKELY(!aligned64(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   //toff  = get_tree_offset(a); /* == 0, unused */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
+      goto slowcase;
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_read( svOld, thr,a,8 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_64to32splits++;
+   zsm_apply32___msm_read( thr, a + 0 );
+   zsm_apply32___msm_read( thr, a + 4 );
+}
+
+void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno;
+   //UWord      toff;
+   SVal       svOld, svNew;
+   UShort     descr;
+   stats__cline_read64s++;
+   if (UNLIKELY(!aligned64(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   //toff  = get_tree_offset(a); /* == 0, unused */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
+      goto slowcase;
+   }
+   svOld = cl->svals[cloff];
+   svNew = msm_write( svOld, thr,a,8 );
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+   return;
+  slowcase: /* misaligned, or must go further down the tree */
+   stats__cline_64to32splits++;
+   zsm_apply32___msm_write( thr, a + 0 );
+   zsm_apply32___msm_write( thr, a + 4 );
+}
+
+/*--------------- ZSM accesses: 8 bit write --------------- */
+
+static
+void zsm_write8 ( Addr a, SVal svNew ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   UShort     descr;
+   stats__cline_set8s++;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 .. 7 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+      SVal* tree = &cl->svals[tno << 3];
+      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+      if (CHECK_ZSM)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+   }
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff] = svNew;
+}
+
+/*--------------- ZSM accesses: 16 bit write --------------- */
+
+static
+void zsm_write16 ( Addr a, SVal svNew ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   UShort     descr;
+   stats__cline_set16s++;
+   if (UNLIKELY(!aligned16(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
+      if (valid_value_is_below_me_16(descr, toff)) {
+         /* Writing at this level.  Need to fix up 'descr'. */
+         cl->descrs[tno] = pullup_descr_to_16(descr, toff);
+         /* At this point, the tree does not match cl->descr[tno] any
+            more.  The assignments below will fix it up. */
+      } else {
+         /* We can't indiscriminately write on the w16 node as in the
+            w64 case, as that might make the node inconsistent with
+            its parent.  So first, pull down to this level. */
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
+      if (CHECK_ZSM)
+         tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+      }
+   }
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff + 0] = svNew;
+   cl->svals[cloff + 1] = SVal_INVALID;
+   return;
+  slowcase: /* misaligned */
+   stats__cline_16to8splits++;
+   zsm_write8( a + 0, svNew );
+   zsm_write8( a + 1, svNew );
+}
+
+/*--------------- ZSM accesses: 32 bit write --------------- */
+
+static
+void zsm_write32 ( Addr a, SVal svNew ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   UShort     descr;
+   stats__cline_set32s++;
+   if (UNLIKELY(!aligned32(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 or 4 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
+      if (valid_value_is_above_me_32(descr, toff)) {
+         /* We can't indiscriminately write on the w32 node as in the
+            w64 case, as that might make the node inconsistent with
+            its parent.  So first, pull down to this level. */
+         SVal* tree = &cl->svals[tno << 3];
+         cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
+         if (CHECK_ZSM)
+            tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
+      } else {
+         /* Writing at this level.  Need to fix up 'descr'. */
+         cl->descrs[tno] = pullup_descr_to_32(descr, toff);
+         /* At this point, the tree does not match cl->descr[tno] any
+            more.  The assignments below will fix it up. */
+      }
+   }
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff + 0] = svNew;
+   cl->svals[cloff + 1] = SVal_INVALID;
+   cl->svals[cloff + 2] = SVal_INVALID;
+   cl->svals[cloff + 3] = SVal_INVALID;
+   return;
+  slowcase: /* misaligned */
+   stats__cline_32to16splits++;
+   zsm_write16( a + 0, svNew );
+   zsm_write16( a + 2, svNew );
+}
+
+/*--------------- ZSM accesses: 64 bit write --------------- */
+
+static
+void zsm_write64 ( Addr a, SVal svNew ) {
+   CacheLine* cl; 
+   UWord      cloff, tno;
+   //UWord    toff;
+   stats__cline_set64s++;
+   if (UNLIKELY(!aligned64(a))) goto slowcase;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   //toff  = get_tree_offset(a); /* == 0, unused */
+   cl->descrs[tno] = TREE_DESCR_64;
+   tl_assert(svNew != SVal_INVALID);
+   cl->svals[cloff + 0] = svNew;
+   cl->svals[cloff + 1] = SVal_INVALID;
+   cl->svals[cloff + 2] = SVal_INVALID;
+   cl->svals[cloff + 3] = SVal_INVALID;
+   cl->svals[cloff + 4] = SVal_INVALID;
+   cl->svals[cloff + 5] = SVal_INVALID;
+   cl->svals[cloff + 6] = SVal_INVALID;
+   cl->svals[cloff + 7] = SVal_INVALID;
+   return;
+  slowcase: /* misaligned */
+   stats__cline_64to32splits++;
+   zsm_write32( a + 0, svNew );
+   zsm_write32( a + 4, svNew );
+}
+
+/*------------- ZSM accesses: 8 bit read/copy ------------- */
+
+static
+SVal zsm_read8 ( Addr a ) {
+   CacheLine* cl; 
+   UWord      cloff, tno, toff;
+   UShort     descr;
+   stats__cline_get8s++;
+   cl    = get_cacheline(a);
+   cloff = get_cacheline_offset(a);
+   tno   = get_treeno(a);
+   toff  = get_tree_offset(a); /* == 0 .. 7 */
+   descr = cl->descrs[tno];
+   if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
+      SVal* tree = &cl->svals[tno << 3];
+      cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
+   }
+   return cl->svals[cloff];
+}
+
+static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
+   SVal       sv;
+   stats__cline_copy8s++;
+   sv = zsm_read8( src );
+   zsm_write8( dst, sv );
+}
+
+/* ------------ Shadow memory range setting ops ------------ */
+
+void zsm_apply_range___msm_read ( Thr* thr, 
+                                  Addr a, SizeT len )
+{
+   /* fast track a couple of common cases */
+   if (len == 4 && aligned32(a)) {
+      zsm_apply32___msm_read( thr, a );
+      return;
+   }
+   if (len == 8 && aligned64(a)) {
+      zsm_apply64___msm_read( thr, a );
+      return;
+   }
+
+   /* be completely general (but as efficient as possible) */
+   if (len == 0) return;
+
+   if (!aligned16(a) && len >= 1) {
+      zsm_apply8___msm_read( thr, a );
+      a += 1;
+      len -= 1;
+      tl_assert(aligned16(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned32(a) && len >= 2) {
+      zsm_apply16___msm_read( thr, a );
+      a += 2;
+      len -= 2;
+      tl_assert(aligned32(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned64(a) && len >= 4) {
+      zsm_apply32___msm_read( thr, a );
+      a += 4;
+      len -= 4;
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 8) {
+      tl_assert(aligned64(a));
+      while (len >= 8) {
+         zsm_apply64___msm_read( thr, a );
+         a += 8;
+         len -= 8;
+      }
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 4)
+      tl_assert(aligned32(a));
+   if (len >= 4) {
+      zsm_apply32___msm_read( thr, a );
+      a += 4;
+      len -= 4;
+   }
+   if (len == 0) return;
+
+   if (len >= 2)
+      tl_assert(aligned16(a));
+   if (len >= 2) {
+      zsm_apply16___msm_read( thr, a );
+      a += 2;
+      len -= 2;
+   }
+   if (len == 0) return;
+
+   if (len >= 1) {
+      zsm_apply8___msm_read( thr, a );
+      //a += 1;
+      len -= 1;
+   }
+   tl_assert(len == 0);
+}
+
+
+
+void zsm_apply_range___msm_write ( Thr* thr,
+                                   Addr a, SizeT len )
+{
+   /* fast track a couple of common cases */
+   if (len == 4 && aligned32(a)) {
+      zsm_apply32___msm_write( thr, a );
+      return;
+   }
+   if (len == 8 && aligned64(a)) {
+      zsm_apply64___msm_write( thr, a );
+      return;
+   }
+
+   /* be completely general (but as efficient as possible) */
+   if (len == 0) return;
+
+   if (!aligned16(a) && len >= 1) {
+      zsm_apply8___msm_write( thr, a );
+      a += 1;
+      len -= 1;
+      tl_assert(aligned16(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned32(a) && len >= 2) {
+      zsm_apply16___msm_write( thr, a );
+      a += 2;
+      len -= 2;
+      tl_assert(aligned32(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned64(a) && len >= 4) {
+      zsm_apply32___msm_write( thr, a );
+      a += 4;
+      len -= 4;
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 8) {
+      tl_assert(aligned64(a));
+      while (len >= 8) {
+         zsm_apply64___msm_write( thr, a );
+         a += 8;
+         len -= 8;
+      }
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 4)
+      tl_assert(aligned32(a));
+   if (len >= 4) {
+      zsm_apply32___msm_write( thr, a );
+      a += 4;
+      len -= 4;
+   }
+   if (len == 0) return;
+
+   if (len >= 2)
+      tl_assert(aligned16(a));
+   if (len >= 2) {
+      zsm_apply16___msm_write( thr, a );
+      a += 2;
+      len -= 2;
+   }
+   if (len == 0) return;
+
+   if (len >= 1) {
+      zsm_apply8___msm_write( thr, a );
+      //a += 1;
+      len -= 1;
+   }
+   tl_assert(len == 0);
+}
+
+
+
+
+/* Block-copy states (needed for implementing realloc()). */
+
+static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
+{
+   SizeT i;
+   if (len == 0)
+      return;
+
+   /* assert for non-overlappingness */
+   tl_assert(src+len <= dst || dst+len <= src);
+
+   /* To be simple, just copy byte by byte.  But so as not to wreck
+      performance for later accesses to dst[0 .. len-1], normalise
+      destination lines as we finish with them, and also normalise the
+      line containing the first and last address. */
+   for (i = 0; i < len; i++) {
+      Bool normalise
+         = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
+           || i == 0       /* first in range */
+           || i == len-1;  /* last in range */
+      zsm_copy8( src+i, dst+i, normalise );
+   }
+}
+
+
+/* For setting address ranges to a given value.  Has considerable
+   sophistication so as to avoid generating large numbers of pointless
+   cache loads/writebacks for large ranges. */
+
+/* Do small ranges in-cache, in the obvious way. */
+static
+void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
+{
+   /* fast track a couple of common cases */
+   if (len == 4 && aligned32(a)) {
+      zsm_write32( a, svNew );
+      return;
+   }
+   if (len == 8 && aligned64(a)) {
+      zsm_write64( a, svNew );
+      return;
+   }
+
+   /* be completely general (but as efficient as possible) */
+   if (len == 0) return;
+
+   if (!aligned16(a) && len >= 1) {
+      zsm_write8( a, svNew );
+      a += 1;
+      len -= 1;
+      tl_assert(aligned16(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned32(a) && len >= 2) {
+      zsm_write16( a, svNew );
+      a += 2;
+      len -= 2;
+      tl_assert(aligned32(a));
+   }
+   if (len == 0) return;
+
+   if (!aligned64(a) && len >= 4) {
+      zsm_write32( a, svNew );
+      a += 4;
+      len -= 4;
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 8) {
+      tl_assert(aligned64(a));
+      while (len >= 8) {
+         zsm_write64( a, svNew );
+         a += 8;
+         len -= 8;
+      }
+      tl_assert(aligned64(a));
+   }
+   if (len == 0) return;
+
+   if (len >= 4)
+      tl_assert(aligned32(a));
+   if (len >= 4) {
+      zsm_write32( a, svNew );
+      a += 4;
+      len -= 4;
+   }
+   if (len == 0) return;
+
+   if (len >= 2)
+      tl_assert(aligned16(a));
+   if (len >= 2) {
+      zsm_write16( a, svNew );
+      a += 2;
+      len -= 2;
+   }
+   if (len == 0) return;
+
+   if (len >= 1) {
+      zsm_write8( a, svNew );
+      //a += 1;
+      len -= 1;
+   }
+   tl_assert(len == 0);
+}
+
+
+/* If we're doing a small range, hand off to zsm_set_range_SMALL.  But
+   for larger ranges, try to operate directly on the out-of-cache
+   representation, rather than dragging lines into the cache,
+   overwriting them, and forcing them out.  This turns out to be an
+   important performance optimisation. */
+
+static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
+{
+   tl_assert(svNew != SVal_INVALID);
+   stats__cache_make_New_arange += (ULong)len;
+
+   if (0 && len > 500)
+      VG_(printf)("make New      ( %#lx, %ld )\n", a, len );
+
+   if (0) {
+      static UWord n_New_in_cache = 0;
+      static UWord n_New_not_in_cache = 0;
+      /* tag is 'a' with the in-line offset masked out, 
+         eg a[31]..a[4] 0000 */
+      Addr       tag = a & ~(N_LINE_ARANGE - 1);
+      UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+      if (LIKELY(tag == cache_shmem.tags0[wix])) {
+         n_New_in_cache++;
+      } else {
+         n_New_not_in_cache++;
+      }
+      if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
+         VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
+                     n_New_in_cache, n_New_not_in_cache );
+   }
+
+   if (LIKELY(len < 2 * N_LINE_ARANGE)) {
+      zsm_set_range_SMALL( a, len, svNew );
+   } else {
+      Addr  before_start  = a;
+      Addr  aligned_start = cacheline_ROUNDUP(a);
+      Addr  after_start   = cacheline_ROUNDDN(a + len);
+      UWord before_len    = aligned_start - before_start;
+      UWord aligned_len   = after_start - aligned_start;
+      UWord after_len     = a + len - after_start;
+      tl_assert(before_start <= aligned_start);
+      tl_assert(aligned_start <= after_start);
+      tl_assert(before_len < N_LINE_ARANGE);
+      tl_assert(after_len < N_LINE_ARANGE);
+      tl_assert(get_cacheline_offset(aligned_start) == 0);
+      if (get_cacheline_offset(a) == 0) {
+         tl_assert(before_len == 0);
+         tl_assert(a == aligned_start);
+      }
+      if (get_cacheline_offset(a+len) == 0) {
+         tl_assert(after_len == 0);
+         tl_assert(after_start == a+len);
+      }
+      if (before_len > 0) {
+         zsm_set_range_SMALL( before_start, before_len, svNew );
+      }
+      if (after_len > 0) {
+         zsm_set_range_SMALL( after_start, after_len, svNew );
+      }
+      stats__cache_make_New_inZrep += (ULong)aligned_len;
+
+      while (1) {
+         Addr tag;
+         UWord wix;
+         if (aligned_start >= after_start)
+            break;
+         tl_assert(get_cacheline_offset(aligned_start) == 0);
+         tag = aligned_start & ~(N_LINE_ARANGE - 1);
+         wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
+         if (tag == cache_shmem.tags0[wix]) {
+            UWord i;
+            for (i = 0; i < N_LINE_ARANGE / 8; i++)
+               zsm_write64( aligned_start + i * 8, svNew );
+         } else {
+            UWord i;
+            Word zix;
+            SecMap* sm;
+            LineZ* lineZ;
+            /* This line is not in the cache.  Do not force it in; instead
+               modify it in-place. */
+            /* find the Z line to write in and rcdec it or the
+               associated F line. */
+            find_Z_for_writing( &sm, &zix, tag );
+            tl_assert(sm);
+            tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
+            lineZ = &sm->linesZ[zix];
+            lineZ->dict[0] = svNew;
+            lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
+            for (i = 0; i < N_LINE_ARANGE/4; i++)
+               lineZ->ix2s[i] = 0; /* all refer to dict[0] */
+            rcinc_LineZ(lineZ);
+         }
+         aligned_start += N_LINE_ARANGE;
+         aligned_len -= N_LINE_ARANGE;
+      }
+      tl_assert(aligned_start == after_start);
+      tl_assert(aligned_len == 0);
+   }
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Synchronisation objects                             //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+// (UInt) `echo "Synchronisation object" | md5sum`
+#define SO_MAGIC 0x56b3c5b0U
+
+struct _SO {
+   VtsID viR; /* r-clock of sender */
+   VtsID viW; /* w-clock of sender */
+   UInt  magic;
+};
+
+static SO* SO__Alloc ( void ) {
+   SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
+   so->viR   = VtsID_INVALID;
+   so->viW   = VtsID_INVALID;
+   so->magic = SO_MAGIC;
+   return so;
+}
+static void SO__Dealloc ( SO* so ) {
+   tl_assert(so);
+   tl_assert(so->magic == SO_MAGIC);
+   if (so->viR == VtsID_INVALID) {
+      tl_assert(so->viW == VtsID_INVALID);
+   } else {
+      tl_assert(so->viW != VtsID_INVALID);
+      VtsID__rcdec(so->viR);
+      VtsID__rcdec(so->viW);
+   }
+   so->magic = 0;
+   HG_(free)( so );
+}
+
+
+/////////////////////////////////////////////////////////
+//                                                     //
+// Top Level API                                       //
+//                                                     //
+/////////////////////////////////////////////////////////
+
+static void show_thread_state ( HChar* str, Thr* t ) 
+{
+   if (1) return;
+   if (t->viR == t->viW) {
+      VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
+      VtsID__pp( t->viR );
+      VG_(printf)("%s","\n");
+   } else {
+      VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
+      VtsID__pp( t->viR );
+      VG_(printf)(" viW %u==", t->viW);
+      VtsID__pp( t->viW );
+      VG_(printf)("%s","\n");
+   }
+}
+
+
+Thr* libhb_init (
+        void        (*get_stacktrace)( Thr*, Addr*, UWord ),
+        ExeContext* (*get_EC)( Thr* )
+     )
+{
+   Thr*  thr;
+   VtsID vi;
+   tl_assert(get_stacktrace);
+   tl_assert(get_EC);
+   main_get_stacktrace   = get_stacktrace;
+   main_get_EC           = get_EC;
+
+   // No need to initialise hg_wordfm.
+   // No need to initialise hg_wordset.
+
+   vts_set_init();
+   vts_tab_init();
+   event_map_init();
+   VtsID__invalidate_caches();
+
+   // initialise shadow memory
+   zsm_init( SVal__rcinc, SVal__rcdec );
+
+   thr = Thr__new();
+   vi  = VtsID__mk_Singleton( thr, 1 );
+   thr->viR = vi;
+   thr->viW = vi;
+   VtsID__rcinc(thr->viR);
+   VtsID__rcinc(thr->viW);
+
+   show_thread_state("  root", thr);
+   return thr;
+}
+
+Thr* libhb_create ( Thr* parent )
+{
+   /* The child's VTSs are copies of the parent's VTSs, but ticked at
+      the child's index.  Since the child's index is guaranteed
+      unique, it has never been seen before, so the implicit value
+      before the tick is zero and after that is one. */
+   Thr* child = Thr__new();
+
+   child->viR = VtsID__tick( parent->viR, child );
+   child->viW = VtsID__tick( parent->viW, child );
+   VtsID__rcinc(child->viR);
+   VtsID__rcinc(child->viW);
+
+   tl_assert(VtsID__indexAt( child->viR, child ) == 1);
+   tl_assert(VtsID__indexAt( child->viW, child ) == 1);
+
+   /* and the parent has to move along too */
+   VtsID__rcdec(parent->viR);
+   VtsID__rcdec(parent->viW);
+   parent->viR = VtsID__tick( parent->viR, parent );
+   parent->viW = VtsID__tick( parent->viW, parent );
+   VtsID__rcinc(parent->viR);
+   VtsID__rcinc(parent->viW);
+
+   show_thread_state(" child", child);
+   show_thread_state("parent", parent);
+
+   return child;
+}
+
+/* Shut down the library, and print stats (in fact that's _all_
+   this is for. */
+void libhb_shutdown ( Bool show_stats )
+{
+   if (show_stats) {
+      VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
+      VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
+                  stats__secmaps_allocd,
+                  stats__secmap_ga_space_covered);
+      VG_(printf)("  linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
+                  stats__secmap_linesZ_allocd,
+                  stats__secmap_linesZ_bytes);
+      VG_(printf)("  linesF: %'10lu allocd (%'12lu bytes occupied)\n",
+                  stats__secmap_linesF_allocd,
+                  stats__secmap_linesF_bytes);
+      VG_(printf)(" secmaps: %'10lu iterator steppings\n",
+                  stats__secmap_iterator_steppings);
+      VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
+                  stats__secmaps_search, stats__secmaps_search_slow);
+
+      VG_(printf)("%s","\n");
+      VG_(printf)("   cache: %'lu totrefs (%'lu misses)\n",
+                  stats__cache_totrefs, stats__cache_totmisses );
+      VG_(printf)("   cache: %'14lu Z-fetch,    %'14lu F-fetch\n",
+                  stats__cache_Z_fetches, stats__cache_F_fetches );
+      VG_(printf)("   cache: %'14lu Z-wback,    %'14lu F-wback\n",
+                  stats__cache_Z_wbacks, stats__cache_F_wbacks );
+      VG_(printf)("   cache: %'14lu invals,     %'14lu flushes\n",
+                  stats__cache_invals, stats__cache_flushes );
+      VG_(printf)("   cache: %'14llu arange_New  %'14llu direct-to-Zreps\n",
+                  stats__cache_make_New_arange,
+                  stats__cache_make_New_inZrep);
+
+      VG_(printf)("%s","\n");
+      VG_(printf)("   cline: %'10lu normalises\n",
+                  stats__cline_normalises );
+      VG_(printf)("   cline:  rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+                  stats__cline_read64s,
+                  stats__cline_read32s,
+                  stats__cline_read16s,
+                  stats__cline_read8s );
+      VG_(printf)("   cline:  wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+                  stats__cline_write64s,
+                  stats__cline_write32s,
+                  stats__cline_write16s,
+                  stats__cline_write8s );
+      VG_(printf)("   cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
+                  stats__cline_set64s,
+                  stats__cline_set32s,
+                  stats__cline_set16s,
+                  stats__cline_set8s );
+      VG_(printf)("   cline: get1s %'lu, copy1s %'lu\n",
+                  stats__cline_get8s, stats__cline_copy8s );
+      VG_(printf)("   cline:    splits: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
+                 stats__cline_64to32splits,
+                 stats__cline_32to16splits,
+                 stats__cline_16to8splits );
+      VG_(printf)("   cline: pulldowns: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
+                 stats__cline_64to32pulldown,
+                 stats__cline_32to16pulldown,
+                 stats__cline_16to8pulldown );
+      if (0)
+      VG_(printf)("   cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
+                  (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
+
+      VG_(printf)("%s","\n");
+
+      VG_(printf)("   libhb: %'13llu msm_read  (%'llu changed)\n",
+                  stats__msm_read, stats__msm_read_change);
+      VG_(printf)("   libhb: %'13llu msm_write (%'llu changed)\n",
+                  stats__msm_write, stats__msm_write_change);
+      VG_(printf)("   libhb: %'13llu getOrd queries (%'llu misses)\n",
+                  stats__getOrdering_queries, stats__getOrdering_misses);
+      VG_(printf)("   libhb: %'13llu join2  queries (%'llu misses)\n",
+                  stats__join2_queries, stats__join2_misses);
+
+      VG_(printf)("%s","\n");
+      VG_(printf)(
+         "   libhb: %ld entries in vts_table (approximately %lu bytes)\n",
+         VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
+      );
+      VG_(printf)( "   libhb: %lu entries in vts_set\n",
+                   VG_(sizeFM)( vts_set ) );
+
+      VG_(printf)("%s","\n");
+      VG_(printf)( "   libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
+                   stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
+                   stats__ctxt_rcdec2,
+                   stats__ctxt_rcdec3 );
+      VG_(printf)( "   libhb: ctxt__rcdec: calls %lu, discards %lu\n",
+                   stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
+      VG_(printf)( "   libhb: contextTab: %lu slots, %lu max ents\n",
+                   (UWord)N_RCEC_TAB,
+                   stats__ctxt_tab_curr );
+      VG_(printf)( "   libhb: contextTab: %lu queries, %lu cmps\n",
+                   stats__ctxt_tab_qs,
+                   stats__ctxt_tab_cmps );
+#if 0
+      VG_(printf)("sizeof(AvlNode)     = %lu\n", sizeof(AvlNode));
+      VG_(printf)("sizeof(WordBag)     = %lu\n", sizeof(WordBag));
+      VG_(printf)("sizeof(MaybeWord)   = %lu\n", sizeof(MaybeWord));
+      VG_(printf)("sizeof(CacheLine)   = %lu\n", sizeof(CacheLine));
+      VG_(printf)("sizeof(LineZ)       = %lu\n", sizeof(LineZ));
+      VG_(printf)("sizeof(LineF)       = %lu\n", sizeof(LineF));
+      VG_(printf)("sizeof(SecMap)      = %lu\n", sizeof(SecMap));
+      VG_(printf)("sizeof(Cache)       = %lu\n", sizeof(Cache));
+      VG_(printf)("sizeof(SMCacheEnt)  = %lu\n", sizeof(SMCacheEnt));
+      VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
+      VG_(printf)("sizeof(VTS)         = %lu\n", sizeof(VTS));
+      VG_(printf)("sizeof(ScalarTS)    = %lu\n", sizeof(ScalarTS));
+      VG_(printf)("sizeof(VtsTE)       = %lu\n", sizeof(VtsTE));
+      VG_(printf)("sizeof(MSMInfo)     = %lu\n", sizeof(MSMInfo));
+
+      VG_(printf)("sizeof(struct _XArray)     = %lu\n", sizeof(struct _XArray));
+      VG_(printf)("sizeof(struct _WordFM)     = %lu\n", sizeof(struct _WordFM));
+      VG_(printf)("sizeof(struct _Thr)     = %lu\n", sizeof(struct _Thr));
+      VG_(printf)("sizeof(struct _SO)     = %lu\n", sizeof(struct _SO));
+#endif
+
+      VG_(printf)("%s","<<< END libhb stats >>>\n");
+      VG_(printf)("%s","\n");
+
+   }
+}
+
+void libhb_async_exit ( Thr* thr )
+{
+   /* is there anything we need to do? */
+}
+
+/* Both Segs and SOs point to VTSs.  However, there is no sharing, so
+   a Seg that points at a VTS is its one-and-only owner, and ditto for
+   a SO that points at a VTS. */
+
+SO* libhb_so_alloc ( void )
+{
+   return SO__Alloc();
+}
+
+void libhb_so_dealloc ( SO* so )
+{
+   tl_assert(so);
+   tl_assert(so->magic == SO_MAGIC);
+   SO__Dealloc(so);
+}
+
+/* See comments in libhb.h for details on the meaning of 
+   strong vs weak sends and strong vs weak receives. */
+void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
+{
+   /* Copy the VTSs from 'thr' into the sync object, and then move
+      the thread along one step. */
+
+   tl_assert(so);
+   tl_assert(so->magic == SO_MAGIC);
+
+   /* stay sane .. a thread's read-clock must always lead or be the
+      same as its write-clock */
+   { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
+     tl_assert(ord == POrd_EQ || ord == POrd_LT);
+   }
+
+   /* since we're overwriting the VtsIDs in the SO, we need to drop
+      any references made by the previous contents thereof */
+   if (so->viR == VtsID_INVALID) {
+      tl_assert(so->viW == VtsID_INVALID);
+      so->viR = thr->viR;
+      so->viW = thr->viW;
+      VtsID__rcinc(so->viR);
+      VtsID__rcinc(so->viW);
+   } else {
+      /* In a strong send, we dump any previous VC in the SO and
+         install the sending thread's VC instead.  For a weak send we
+         must join2 with what's already there. */
+      tl_assert(so->viW != VtsID_INVALID);
+      VtsID__rcdec(so->viR);
+      VtsID__rcdec(so->viW);
+      so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
+      so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
+      VtsID__rcinc(so->viR);
+      VtsID__rcinc(so->viW);
+   }
+
+   /* move both parent clocks along */
+   VtsID__rcdec(thr->viR);
+   VtsID__rcdec(thr->viW);
+   thr->viR = VtsID__tick( thr->viR, thr );
+   thr->viW = VtsID__tick( thr->viW, thr );
+   VtsID__rcinc(thr->viR);
+   VtsID__rcinc(thr->viW);
+   if (strong_send)
+      show_thread_state("s-send", thr);
+   else
+      show_thread_state("w-send", thr);
+}
+
+void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
+{
+   tl_assert(so);
+   tl_assert(so->magic == SO_MAGIC);
+
+   if (so->viR != VtsID_INVALID) {
+      tl_assert(so->viW != VtsID_INVALID);
+
+      /* Weak receive (basically, an R-acquisition of a R-W lock).
+         This advances the read-clock of the receiver, but not the
+         write-clock. */
+      VtsID__rcdec(thr->viR);
+      thr->viR = VtsID__join2( thr->viR, so->viR );
+      VtsID__rcinc(thr->viR);
+
+      /* For a strong receive, we also advance the receiver's write
+         clock, which means the receive as a whole is essentially
+         equivalent to a W-acquisition of a R-W lock. */
+      if (strong_recv) {
+         VtsID__rcdec(thr->viW);
+         thr->viW = VtsID__join2( thr->viW, so->viW );
+         VtsID__rcinc(thr->viW);
+      }
+
+      if (strong_recv) 
+         show_thread_state("s-recv", thr);
+      else 
+         show_thread_state("w-recv", thr);
+
+   } else {
+      tl_assert(so->viW == VtsID_INVALID);
+      /* Deal with degenerate case: 'so' has no vts, so there has been
+         no message posted to it.  Just ignore this case. */
+      show_thread_state("d-recv", thr);
+   }
+}
+
+Bool libhb_so_everSent ( SO* so )
+{
+   if (so->viR == VtsID_INVALID) {
+      tl_assert(so->viW == VtsID_INVALID);
+      return False;
+   } else {
+      tl_assert(so->viW != VtsID_INVALID);
+      return True;
+   }
+}
+
+#define XXX1 0 // 0x67a106c
+#define XXX2 0
+
+static Bool TRACEME(Addr a, SizeT szB) {
+   if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
+   if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
+   return False;
+}
+static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
+  SVal sv = zsm_read8(a);
+  VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
+  show_thread_state("", thr);
+  VG_(printf)("%s","\n");
+}
+
+void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
+{
+   SVal sv = SVal__mkC(thr->viW, thr->viW);
+   tl_assert(is_sane_SVal_C(sv));
+   if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
+   zsm_set_range( a, szB, sv );
+   if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
+}
+
+void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
+{
+   if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
+   zsm_set_range( a, szB, SVal__mkA() );
+   if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
+}
+
+void* libhb_get_Thr_opaque ( Thr* thr ) {
+   tl_assert(thr);
+   return thr->opaque;
+}
+
+void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
+   tl_assert(thr);
+   thr->opaque = v;
+}
+
+void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
+{
+   zsm_copy_range(dst, src, len);
+}
+
+void libhb_maybe_GC ( void )
+{
+   event_map_maybe_GC();
+   /* If there are still freelist entries available, no need for a
+      GC. */
+   if (vts_tab_freelist != VtsID_INVALID)
+      return;
+   /* So all the table entries are full, and we're having to expand
+      the table.  But did we hit the threshhold point yet? */
+   if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
+      return;
+   vts_tab__do_GC( False/*don't show stats*/ );
+}
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+//                                                             //
+// SECTION END main library                                    //
+//                                                             //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/*--------------------------------------------------------------------*/
+/*--- end                                             libhb_main.c ---*/
+/*--------------------------------------------------------------------*/