summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Meeks <michael.meeks@novell.com>2010-09-17 13:13:04 +0200
committerCédric Bosdonnat <cedricbosdo@openoffice.org>2010-09-17 13:16:27 +0200
commitd02318df60b8ab8abe0006bacc5ea15b896be553 (patch)
treeb0ae6659b770ec601556babff7749acc6d60da30
parent2d2ef785745a258659ccb3c262265ae2cd1990a9 (diff)
sal-strintern-speed.diff: sal-strintern - speedup
i#78496
-rw-r--r--sal/rtl/source/hash.cxx202
1 files changed, 165 insertions, 37 deletions
diff --git a/sal/rtl/source/hash.cxx b/sal/rtl/source/hash.cxx
index 7caa2341ca11..86454545ccd5 100644
--- a/sal/rtl/source/hash.cxx
+++ b/sal/rtl/source/hash.cxx
@@ -27,67 +27,164 @@
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_sal.hxx"
-#include "rtl/allocator.hxx"
#include "hash.h"
#include "strimp.h"
+#include <osl/diagnose.h>
+struct StringHashTableImpl {
+ sal_uInt32 nEntries;
+ sal_uInt32 nSize;
+ rtl_uString **pData;
+};
+
+typedef StringHashTableImpl StringHashTable;
-#include <hash_set>
+// Only for use in the implementation
+static StringHashTable *rtl_str_hash_new (sal_uInt32 nSize);
+static void rtl_str_hash_free (StringHashTable *pHash);
-namespace {
+}
-struct UStringHash
+StringHashTable *
+getHashTable ()
{
- size_t operator()(rtl_uString * const &rString) const
- { return (size_t)rtl_ustr_hashCode_WithLength( rString->buffer, rString->length ); }
-};
+ static StringHashTable *pInternPool = NULL;
+ if (pInternPool == NULL) {
+ static StringHashTable* pHash = rtl_str_hash_new(1024);
+ pInternPool = pHash;
+ }
+ return pInternPool;
+}
+
+// Better / smaller / faster hash set ....
+
+// TODO: add bottom bit-set list terminator to string list
-struct UStringEqual
+static sal_uInt32
+getNextSize (sal_uInt32 nSize)
{
- sal_Bool operator() ( rtl_uString * const &pStringA,
- rtl_uString * const &pStringB) const
+ // Sedgewick - Algorithms in C P577.
+ static const sal_uInt32 nPrimes[] = { 1021, 2039, 4093, 8191, 16381, 32749,
+ 65521, 131071,262139, 524287, 1048573,
+ 2097143, 4194301, 8388593, 16777213,
+ 33554393, 67108859, 134217689 };
+ #define NUM_PRIMES (sizeof (nPrimes)/ sizeof (nPrimes[0]))
+ for (sal_uInt32 i = 0; i < NUM_PRIMES; i++)
{
- if (pStringA == pStringB)
- return true;
- if (pStringA->length != pStringB->length)
- return false;
- return !rtl_ustr_compare_WithLength( pStringA->buffer, pStringA->length,
- pStringB->buffer, pStringB->length);
+ if (nPrimes[i] > nSize)
+ return nPrimes[i];
}
-};
+ return nSize * 2;
+}
-typedef std::hash_set< rtl_uString *, UStringHash, UStringEqual,
- rtl::Allocator<rtl_uString *> > StringHashTable;
+static sal_uInt32
+hashString (rtl_uString *pString)
+{
+ return (sal_uInt32) rtl_ustr_hashCode_WithLength (pString->buffer,
+ pString->length);
+}
-StringHashTable *
-getHashTable ()
+static StringHashTable *
+rtl_str_hash_new (sal_uInt32 nSize)
{
- static StringHashTable *pInternPool = NULL;
- if (pInternPool == NULL) {
- static StringHashTable aImpl(1024);
- pInternPool = &aImpl;
+ StringHashTable *pHash = (StringHashTable *)malloc (sizeof (StringHashTable));
+
+ pHash->nEntries = 0;
+ pHash->nSize = getNextSize (nSize);
+ pHash->pData = (rtl_uString **) calloc (sizeof (rtl_uString *), pHash->nSize);
+
+ return pHash;
+}
+
+static void
+rtl_str_hash_free (StringHashTable *pHash)
+{
+ if (!pHash)
+ return;
+ if (pHash->pData)
+ free (pHash->pData);
+ free (pHash);
+}
+
+static void
+rtl_str_hash_insert_nonequal (StringHashTable *pHash,
+ rtl_uString *pString)
+{
+ sal_uInt32 nHash = hashString (pString);
+ sal_uInt32 n;
+ rtl_uString *pHashStr;
+
+ n = nHash % pHash->nSize;
+ while ((pHashStr = pHash->pData[n]) != NULL) {
+ n++;
+ if (n >= pHash->nSize)
+ n = 0;
}
- return pInternPool;
+ pHash->pData[n] = pString;
+}
+
+static void
+rtl_str_hash_resize (sal_uInt32 nNewSize)
+{
+ sal_uInt32 i;
+ StringHashTable *pNewHash;
+ StringHashTable *pHash = getHashTable();
+
+ OSL_ASSERT (nNewSize > pHash->nEntries);
+
+ pNewHash = rtl_str_hash_new (nNewSize);
+
+ for (i = 0; i < pHash->nSize; i++)
+ {
+ if (pHash->pData[i] != NULL)
+ rtl_str_hash_insert_nonequal (pNewHash, pHash->pData[i]);
+ }
+ pNewHash->nEntries = pHash->nEntries;
+ free (pHash->pData);
+ *pHash = *pNewHash;
+ pNewHash->pData = NULL;
+ rtl_str_hash_free (pNewHash);
}
+static int
+compareEqual (rtl_uString *pStringA, rtl_uString *pStringB)
+{
+ if (pStringA == pStringB)
+ return 1;
+ if (pStringA->length != pStringB->length)
+ return 0;
+ return !rtl_ustr_compare_WithLength( pStringA->buffer, pStringA->length,
+ pStringB->buffer, pStringB->length);
}
-extern "C" {
rtl_uString *
rtl_str_hash_intern (rtl_uString *pString,
int can_return)
{
+ sal_uInt32 nHash = hashString (pString);
+ sal_uInt32 n;
+ rtl_uString *pHashStr;
+
StringHashTable *pHash = getHashTable();
- StringHashTable::iterator aIter;
- aIter = pHash->find(pString);
- if (aIter != pHash->end())
- {
- rtl_uString *pHashStr = *aIter;
- rtl_uString_acquire (pHashStr);
- return pHashStr;
+
+ // Should we resize ?
+ if (pHash->nEntries >= pHash->nSize/2)
+ rtl_str_hash_resize (getNextSize(pHash->nSize));
+
+ n = nHash % pHash->nSize;
+ while ((pHashStr = pHash->pData[n]) != NULL) {
+ if (compareEqual (pHashStr, pString))
+ {
+ rtl_uString_acquire (pHashStr);
+ return pHashStr;
+ }
+ n++;
+ if (n >= pHash->nSize)
+ n = 0;
}
+
if (!can_return)
{
rtl_uString *pCopy = NULL;
@@ -99,7 +196,8 @@ rtl_str_hash_intern (rtl_uString *pString,
if (!SAL_STRING_IS_STATIC (pString))
pString->refCount |= SAL_STRING_INTERN_FLAG;
- pHash->insert(pString);
+ pHash->pData[n] = pString;
+ pHash->nEntries++;
return pString;
}
@@ -107,7 +205,37 @@ rtl_str_hash_intern (rtl_uString *pString,
void
rtl_str_hash_remove (rtl_uString *pString)
{
- getHashTable()->erase(pString);
-}
+ sal_uInt32 n;
+ sal_uInt32 nHash = hashString (pString);
+ rtl_uString *pHashStr;
+
+ StringHashTable *pHash = getHashTable();
+ n = nHash % pHash->nSize;
+ while ((pHashStr = pHash->pData[n]) != NULL) {
+ if (compareEqual (pHashStr, pString))
+ break;
+ n++;
+ if (n >= pHash->nSize)
+ n = 0;
+ }
+ OSL_ASSERT (pHash->pData[n] != 0);
+ if (pHash->pData[n] == NULL)
+ return;
+
+ pHash->pData[n++] = NULL;
+ pHash->nEntries--;
+
+ if (n >= pHash->nSize)
+ n = 0;
+
+ while ((pHashStr = pHash->pData[n]) != NULL) {
+ pHash->pData[n] = NULL;
+ // FIXME: rather unsophisticated and N^2 in chain-length, but robust.
+ rtl_str_hash_insert_nonequal (pHash, pHashStr);
+ n++;
+ if (n >= pHash->nSize)
+ n = 0;
+ }
+ // FIXME: Should we down-size ?
}