summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephan Bergmann <sbergman@redhat.com>2014-02-18 12:49:50 +0100
committerStephan Bergmann <sbergman@redhat.com>2014-02-18 13:08:58 +0100
commit042725a5dadc9f2c6368ca451b6d20046129b8af (patch)
treec43b1359bcef882985a06303551a227825d1e5a8
parentf2529fc7441ced32daef09c0ca767080998d1792 (diff)
Stick to a single O[U]String hash function
8f8bc0dcf3bc253ae49159d52db049767f476ced "Move string hash function into String class" had introduced a new getHash64 that, besides returning sal_uInt64 instead of just sal_Int32, didn't do sampling of only a handful of characters, but always computed the hash over all characters (as the usage in SfxItemSet and SdPage appears to require for either performance or approximated correctness). However, it would be advantageous to keep the stable URE interface as small as possible. Now, O(1) sampling was apparently considered state of the art when the rtl string classes were first created, closely copying java.lang.String, which at that time demanded sampling for hashCode(), too---but never sampling more than 15 characters, with the obvious (in hindsight, at least) performance catastrophes, so they changed it to O(n) somewhere along the way. Based on that, this commit changes the existing hash functions to not do sampling any more, and removes the newly introduced -64 variants again. (Where the extended value range of sal_uInt64 compared to sal_Int32 was hopefully not vital to the existing uses.) The old implementation used sampling only for strings of length >= 256, so I did a "make check" build with an instrumented hash function that flagged all uses with inputs of length >= 256, and grepped workdir/{Cppunit,Junit,Python}Test for hits. Of the 2849 hits encountered, 2845 where in the range from 256 to 295 characters, and only the remaining four where of 2472 characters. Those four were from CppunitTest_sc_subsequent_filters_test, importing long text into a cell, causing ScDocumentImport::setStringCell to call svl::SharedStringPool::intern, which internally uses an unordered_set. These results appear to justify the change. Change-Id: I78fcc3b0f07389bdf36a21701b95a1ff0a0d970f
-rw-r--r--include/rtl/string.h18
-rw-r--r--include/rtl/string.hxx15
-rw-r--r--include/rtl/ustring.h18
-rw-r--r--include/rtl/ustring.hxx15
-rw-r--r--include/svl/itemset.hxx2
-rw-r--r--sal/rtl/strtmpl.cxx57
-rw-r--r--sal/util/sal.map6
-rw-r--r--sd/inc/sdpage.hxx2
-rw-r--r--sd/source/core/sdpage2.cxx4
-rw-r--r--sd/source/core/stlpool.cxx2
-rw-r--r--svl/source/items/itemset.cxx4
11 files changed, 10 insertions, 133 deletions
diff --git a/include/rtl/string.h b/include/rtl/string.h
index 71e095561a37..32344bfa4413 100644
--- a/include/rtl/string.h
+++ b/include/rtl/string.h
@@ -277,24 +277,6 @@ SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_str_hashCode(
SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_str_hashCode_WithLength(
const sal_Char * str, sal_Int32 len ) SAL_THROW_EXTERN_C();
-/** Return a hash code (64bit) for a string.
-
- It is not allowed to store the hash code persistently, because later
- versions could return other hash codes.
-
- @param str
- a string. Need not be null-terminated, but must be at least as long as
- the specified len.
-
- @param len
- the length of the string.
-
- @return
- a hash code for the given string.
- */
-SAL_DLLPUBLIC sal_uInt64 SAL_CALL rtl_str_hashCode64_WithLength(
- const sal_Char * str, sal_Int32 len ) SAL_THROW_EXTERN_C();
-
/** Search for the first occurrence of a character within a string.
The string must be null-terminated.
diff --git a/include/rtl/string.hxx b/include/rtl/string.hxx
index 5eb4fcf6ee1e..fb7283b3a801 100644
--- a/include/rtl/string.hxx
+++ b/include/rtl/string.hxx
@@ -892,21 +892,6 @@ public:
}
/**
- Returns a 64bit hash of the string data.
- This hashes the entire data, while hashCode would do sampling for larger string sizes.
-
- @return a hash code value of the string data
-
- @see hashCode() for simple hashes
-
- @since LibreOffice 4.3
- */
- sal_uInt64 hashCode64() const SAL_THROW(())
- {
- return rtl_str_hashCode64_WithLength( pData->buffer, pData->length );
- }
-
- /**
Returns a hashcode for this string.
@return a hash code value for this object.
diff --git a/include/rtl/ustring.h b/include/rtl/ustring.h
index 206989934628..80c6bccf7f3d 100644
--- a/include/rtl/ustring.h
+++ b/include/rtl/ustring.h
@@ -551,24 +551,6 @@ SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_ustr_hashCode(
SAL_DLLPUBLIC sal_Int32 SAL_CALL rtl_ustr_hashCode_WithLength(
const sal_Unicode * str, sal_Int32 len ) SAL_THROW_EXTERN_C();
-/** Return a hash code (64bit) for a string.
-
- It is not allowed to store the hash code persistently, because later
- versions could return other hash codes.
-
- @param str
- a string. Need not be null-terminated, but must be at least as long as
- the specified len.
-
- @param len
- the length of the string.
-
- @return
- a hash code for the given string.
- */
-SAL_DLLPUBLIC sal_uInt64 SAL_CALL rtl_ustr_hashCode64_WithLength(
- const sal_Unicode * str, sal_Int32 len ) SAL_THROW_EXTERN_C();
-
/** Search for the first occurrence of a character within a string.
The string must be null-terminated.
diff --git a/include/rtl/ustring.hxx b/include/rtl/ustring.hxx
index 9f9a9565f18f..f1a5f4aeb296 100644
--- a/include/rtl/ustring.hxx
+++ b/include/rtl/ustring.hxx
@@ -1227,21 +1227,6 @@ public:
}
/**
- Returns a 64bit hash of the string data.
- This hashes the entire data, while hashCode would do sampling for larger string sizes.
-
- @return a hash code value of the string data
-
- @see hashCode() for simple hashes
-
- @since LibreOffice 4.3
- */
- sal_uInt64 hashCode64() const SAL_THROW(())
- {
- return rtl_ustr_hashCode64_WithLength( pData->buffer, pData->length );
- }
-
- /**
Returns a hashcode for this string.
@return a hash code value for this object.
diff --git a/include/svl/itemset.hxx b/include/svl/itemset.hxx
index 228428158fc4..b1a00c76e1eb 100644
--- a/include/svl/itemset.hxx
+++ b/include/svl/itemset.hxx
@@ -144,7 +144,7 @@ public:
virtual SvStream & Store( SvStream &, bool bDirect = false ) const;
bool operator==(const SfxItemSet &) const;
- virtual sal_uInt64 getHash() const;
+ sal_Int32 getHash() const;
virtual OString stringify() const;
};
diff --git a/sal/rtl/strtmpl.cxx b/sal/rtl/strtmpl.cxx
index a9eb665e9216..957609692449 100644
--- a/sal/rtl/strtmpl.cxx
+++ b/sal/rtl/strtmpl.cxx
@@ -252,68 +252,17 @@ sal_Int32 SAL_CALL IMPL_RTL_STRNAME( hashCode )( const IMPL_RTL_STRCODE* pStr )
/* ----------------------------------------------------------------------- */
-sal_uInt64 SAL_CALL IMPL_RTL_STRNAME( hashCode64_WithLength )( const IMPL_RTL_STRCODE* pStr,
- sal_Int32 nLen )
- SAL_THROW_EXTERN_C()
-{
- sal_uInt64 nHash = 0;
-
- for( sal_Int32 i = 0; i < nLen; i++ )
- nHash = (nHash << 5) - nHash + *pStr++;
- return nHash;
-}
-
-/* ----------------------------------------------------------------------- */
-
sal_Int32 SAL_CALL IMPL_RTL_STRNAME( hashCode_WithLength )( const IMPL_RTL_STRCODE* pStr,
sal_Int32 nLen )
SAL_THROW_EXTERN_C()
{
sal_uInt32 h = static_cast<sal_uInt32>(nLen);
-
- if ( nLen < 256 )
- {
- while ( nLen > 0 )
- {
- h = (h*37U) + IMPL_RTL_USTRCODE( *pStr );
- pStr++;
- nLen--;
- }
- }
- else
+ while ( nLen > 0 )
{
- sal_Int32 nSkip;
- const IMPL_RTL_STRCODE* pEndStr = pStr+nLen-5;
-
- /* only sample some characters */
- /* the first 3, some characters between, and the last 5 */
- h = (h*39U) + IMPL_RTL_USTRCODE( *pStr );
- pStr++;
- h = (h*39U) + IMPL_RTL_USTRCODE( *pStr );
- pStr++;
- h = (h*39U) + IMPL_RTL_USTRCODE( *pStr );
+ h = (h*37U) + IMPL_RTL_USTRCODE( *pStr );
pStr++;
-
- nSkip = nLen / 8;
- nLen -= 8;
- while ( nLen > 0 )
- {
- h = (h*39U) + IMPL_RTL_USTRCODE( *pStr );
- pStr += nSkip;
- nLen -= nSkip;
- }
-
- h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
- pEndStr++;
- h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
- pEndStr++;
- h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
- pEndStr++;
- h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
- pEndStr++;
- h = (h*39U) + IMPL_RTL_USTRCODE( *pEndStr );
+ nLen--;
}
-
return static_cast<sal_Int32>(h);
}
diff --git a/sal/util/sal.map b/sal/util/sal.map
index 074eb7b96063..1456d6db1cf8 100644
--- a/sal/util/sal.map
+++ b/sal/util/sal.map
@@ -670,12 +670,6 @@ LIBO_UDK_4.2 { # symbols available in >= LibO 4.2
rtl_ustr_toUInt32;
} LIBO_UDK_4.1;
-LIBO_UDK_4.3 { #symbols available in >= LibO 4.3
- global:
- rtl_str_hashCode64_WithLength;
- rtl_ustr_hashCode64_WithLength;
-} LIBO_UDK_4.2;
-
PRIVATE_1.0 {
global:
osl_detail_ObjectRegistry_storeAddresses;
diff --git a/sd/inc/sdpage.hxx b/sd/inc/sdpage.hxx
index c08b48f71bd1..40a4dcf15358 100644
--- a/sd/inc/sdpage.hxx
+++ b/sd/inc/sdpage.hxx
@@ -378,7 +378,7 @@ public:
void removeAnnotation( const ::com::sun::star::uno::Reference< ::com::sun::star::office::XAnnotation >& xAnnotation );
const sd::AnnotationVector& getAnnotations() const { return maAnnotations; }
bool hasAnnotations() const { return !maAnnotations.empty(); }
- sal_uInt64 getHash() const;
+ sal_Int32 getHash() const;
virtual OString stringify() const;
diff --git a/sd/source/core/sdpage2.cxx b/sd/source/core/sdpage2.cxx
index a870895dd32c..3f5615056b0b 100644
--- a/sd/source/core/sdpage2.cxx
+++ b/sd/source/core/sdpage2.cxx
@@ -603,9 +603,9 @@ OString SdPage::stringify() const
return aString.makeStringAndClear();
}
-sal_uInt64 SdPage::getHash() const
+sal_Int32 SdPage::getHash() const
{
- return stringify().hashCode64();
+ return stringify().hashCode();
}
diff --git a/sd/source/core/stlpool.cxx b/sd/source/core/stlpool.cxx
index b1bad09de780..387fb8f2f5a5 100644
--- a/sd/source/core/stlpool.cxx
+++ b/sd/source/core/stlpool.cxx
@@ -650,7 +650,7 @@ void SdStyleSheetPool::CopySheets(SdStyleSheetPool& rSourcePool, SfxStyleFamily
SfxStyleSheetBase* pExistingSheet = Find(aName, eFamily);
if( pExistingSheet && !rRenameSuffix.isEmpty() )
{
- sal_uInt64 nHash = xSheet->GetItemSet().getHash();
+ sal_Int32 nHash = xSheet->GetItemSet().getHash();
if( pExistingSheet->GetItemSet().getHash() != nHash )
{
OUString aTmpName = aName + rRenameSuffix;
diff --git a/svl/source/items/itemset.cxx b/svl/source/items/itemset.cxx
index 0a4e301e6309..d68655f0414f 100644
--- a/svl/source/items/itemset.cxx
+++ b/svl/source/items/itemset.cxx
@@ -2032,9 +2032,9 @@ SfxItemSet *SfxAllItemSet::Clone(sal_Bool bItems, SfxItemPool *pToPool ) const
// -----------------------------------------------------------------------
-sal_uInt64 SfxItemSet::getHash() const
+sal_Int32 SfxItemSet::getHash() const
{
- return stringify().hashCode64();
+ return stringify().hashCode();
}
// -----------------------------------------------------------------------