diff options
Diffstat (limited to 'sal/rtl/source/ustring.cxx')
-rw-r--r-- | sal/rtl/source/ustring.cxx | 997 |
1 files changed, 997 insertions, 0 deletions
diff --git a/sal/rtl/source/ustring.cxx b/sal/rtl/source/ustring.cxx new file mode 100644 index 000000000000..a37353c99161 --- /dev/null +++ b/sal/rtl/source/ustring.cxx @@ -0,0 +1,997 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +#pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance +#endif + +#include <rtl/memory.h> +#include <osl/diagnose.h> +#include <osl/interlck.h> +#include <rtl/alloc.h> +#include <osl/mutex.h> +#include <osl/doublecheckedlocking.h> +#include <rtl/tencinfo.h> + +#include <string.h> +#include <sal/alloca.h> + +#include "hash.hxx" +#include "strimp.hxx" +#include "surrogates.hxx" +#include <rtl/ustring.h> + +#include "rtl/math.h" +#include "rtl/tencinfo.h" + +/* ======================================================================= */ + +/* static data to be referenced by all empty strings + * the refCount is predefined to 1 and must never become 0 ! + */ +static rtl_uString const aImplEmpty_rtl_uString = +{ + (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32 refCount; */ + 0, /*sal_Int32 length; */ + { 0 } /*sal_Unicode buffer[1];*/ +}; + +/* ======================================================================= */ + +#define IMPL_RTL_STRCODE sal_Unicode +#define IMPL_RTL_USTRCODE( c ) (c) +#define IMPL_RTL_STRNAME( n ) rtl_ustr_ ## n + +#define IMPL_RTL_STRINGNAME( n ) rtl_uString_ ## n +#define IMPL_RTL_STRINGDATA rtl_uString +#define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_uString +#define IMPL_RTL_INTERN +static void internRelease (rtl_uString *pThis); + +/* ======================================================================= */ + +/* Include String/UString template code */ + +#include "strtmpl.cxx" + +sal_Int32 rtl_ustr_indexOfAscii_WithLength( + sal_Unicode const * str, sal_Int32 len, + char const * subStr, sal_Int32 subLen) SAL_THROW_EXTERN_C() +{ + if (subLen > 0 && subLen <= len) { + sal_Int32 i; + for (i = 0; i <= len - subLen; ++i) { + if (rtl_ustr_asciil_reverseEquals_WithLength( + str + i, subStr, subLen)) + { + return i; + } + } + } + return -1; +} + +sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength( + sal_Unicode const * str, sal_Int32 len, + char const * subStr, sal_Int32 subLen) SAL_THROW_EXTERN_C() +{ + if (subLen > 0 && subLen <= len) { + sal_Int32 i; + for (i = len - subLen; i >= 0; --i) { + if (rtl_ustr_asciil_reverseEquals_WithLength( + str + i, subStr, subLen)) + { + return i; + } + } + } + return -1; +} + +sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f) + SAL_THROW_EXTERN_C() +{ + rtl_uString * pResult = NULL; + sal_Int32 nLen; + rtl_math_doubleToUString( + &pResult, 0, 0, f, rtl_math_StringFormat_G, + RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, + 0, sal_True); + nLen = pResult->length; + OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT); + rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode)); + rtl_uString_release(pResult); + return nLen; +} + +sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d) + SAL_THROW_EXTERN_C() +{ + rtl_uString * pResult = NULL; + sal_Int32 nLen; + rtl_math_doubleToUString( + &pResult, 0, 0, d, rtl_math_StringFormat_G, + RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, + 0, sal_True); + nLen = pResult->length; + OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE); + rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode)); + rtl_uString_release(pResult); + return nLen; +} + +float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr) SAL_THROW_EXTERN_C() +{ + return (float) rtl_math_uStringToDouble(pStr, + pStr + rtl_ustr_getLength(pStr), + '.', 0, 0, 0); +} + +double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr) SAL_THROW_EXTERN_C() +{ + return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.', + 0, 0, 0); +} + +/* ======================================================================= */ + +sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1, + const sal_Char* pStr2 ) + SAL_THROW_EXTERN_C() +{ + sal_Int32 nRet; + while ( ((nRet = ((sal_Int32)(*pStr1))- + ((sal_Int32)((unsigned char)(*pStr2)))) == 0) && + *pStr2 ) + { + pStr1++; + pStr2++; + } + + return nRet; +} + +/* ----------------------------------------------------------------------- */ + +sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1, + sal_Int32 nStr1Len, + const sal_Char* pStr2 ) + SAL_THROW_EXTERN_C() +{ + sal_Int32 nRet = 0; + while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)- + ((sal_Int32)((unsigned char)(*pStr2)))) == 0) && + nStr1Len && *pStr2 ) + { + pStr1++; + pStr2++; + nStr1Len--; + } + + return nRet; +} + +/* ----------------------------------------------------------------------- */ + +sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1, + sal_Int32 nStr1Len, + const sal_Char* pStr2, + sal_Int32 nShortenedLength ) + SAL_THROW_EXTERN_C() +{ + const sal_Unicode* pStr1End = pStr1 + nStr1Len; + sal_Int32 nRet; + while ( (nShortenedLength > 0) && + (pStr1 < pStr1End) && *pStr2 ) + { + /* Check ASCII range */ + OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127"); + + nRet = ((sal_Int32)*pStr1)- + ((sal_Int32)(unsigned char)*pStr2); + if ( nRet != 0 ) + return nRet; + + nShortenedLength--; + pStr1++; + pStr2++; + } + + if ( nShortenedLength <= 0 ) + return 0; + + if ( *pStr2 ) + { + OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" ); + // first is a substring of the second string => less (negative value) + nRet = -1; + } + else + { + // greater or equal + nRet = pStr1End - pStr1; + } + + return nRet; +} + +/* ----------------------------------------------------------------------- */ + +sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1, + sal_Int32 nStr1Len, + const sal_Char* pStr2, + sal_Int32 nStr2Len ) + SAL_THROW_EXTERN_C() +{ + const sal_Unicode* pStr1Run = pStr1+nStr1Len; + const sal_Char* pStr2Run = pStr2+nStr2Len; + sal_Int32 nRet; + while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) ) + { + pStr1Run--; + pStr2Run--; + nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run); + if ( nRet ) + return nRet; + } + + return nStr1Len - nStr2Len; +} + +/* ----------------------------------------------------------------------- */ + +sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1, + const sal_Char* pStr2, + sal_Int32 nStrLen ) + SAL_THROW_EXTERN_C() +{ + const sal_Unicode* pStr1Run = pStr1+nStrLen; + const sal_Char* pStr2Run = pStr2+nStrLen; + while ( pStr1 < pStr1Run ) + { + pStr1Run--; + pStr2Run--; + if( *pStr1Run != (sal_Unicode)*pStr2Run ) + return sal_False; + } + + return sal_True; +} + +/* ----------------------------------------------------------------------- */ + +sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1, + const sal_Char* pStr2 ) + SAL_THROW_EXTERN_C() +{ + sal_Int32 nRet; + sal_Int32 c1; + sal_Int32 c2; + do + { + /* If character between 'A' and 'Z', than convert it to lowercase */ + c1 = (sal_Int32)*pStr1; + c2 = (sal_Int32)((unsigned char)*pStr2); + if ( (c1 >= 65) && (c1 <= 90) ) + c1 += 32; + if ( (c2 >= 65) && (c2 <= 90) ) + c2 += 32; + nRet = c1-c2; + if ( nRet != 0 ) + return nRet; + + pStr1++; + pStr2++; + } + while ( c2 ); + + return 0; +} + +/* ----------------------------------------------------------------------- */ + +sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1, + sal_Int32 nStr1Len, + const sal_Char* pStr2 ) + SAL_THROW_EXTERN_C() +{ + sal_Int32 nRet; + sal_Int32 c1; + sal_Int32 c2; + do + { + if ( !nStr1Len ) + return *pStr2 == '\0' ? 0 : -1; + + /* If character between 'A' and 'Z', than convert it to lowercase */ + c1 = (sal_Int32)*pStr1; + c2 = (sal_Int32)((unsigned char)*pStr2); + if ( (c1 >= 65) && (c1 <= 90) ) + c1 += 32; + if ( (c2 >= 65) && (c2 <= 90) ) + c2 += 32; + nRet = c1-c2; + if ( nRet != 0 ) + return nRet; + + pStr1++; + pStr2++; + nStr1Len--; + } + while( c2 ); + + return 0; +} + +sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths( + sal_Unicode const * first, sal_Int32 firstLen, + char const * second, sal_Int32 secondLen) SAL_THROW_EXTERN_C() +{ + sal_Int32 i; + sal_Int32 len = firstLen < secondLen ? firstLen : secondLen; + for (i = 0; i < len; ++i) { + sal_Int32 c1 = *first++; + sal_Int32 c2 = (unsigned char) *second++; + sal_Int32 d; + if (c1 >= 65 && c1 <= 90) { + c1 += 32; + } + if (c2 >= 65 && c2 <= 90) { + c2 += 32; + } + d = c1 - c2; + if (d != 0) { + return d; + } + } + return firstLen - secondLen; +} + +/* ----------------------------------------------------------------------- */ + +sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1, + sal_Int32 nStr1Len, + const sal_Char* pStr2, + sal_Int32 nShortenedLength ) + SAL_THROW_EXTERN_C() +{ + const sal_Unicode* pStr1End = pStr1 + nStr1Len; + sal_Int32 nRet; + sal_Int32 c1; + sal_Int32 c2; + while ( (nShortenedLength > 0) && + (pStr1 < pStr1End) && *pStr2 ) + { + /* Check ASCII range */ + OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127"); + + /* If character between 'A' and 'Z', than convert it to lowercase */ + c1 = (sal_Int32)*pStr1; + c2 = (sal_Int32)((unsigned char)*pStr2); + if ( (c1 >= 65) && (c1 <= 90) ) + c1 += 32; + if ( (c2 >= 65) && (c2 <= 90) ) + c2 += 32; + nRet = c1-c2; + if ( nRet != 0 ) + return nRet; + + nShortenedLength--; + pStr1++; + pStr2++; + } + + if ( nShortenedLength <= 0 ) + return 0; + + if ( *pStr2 ) + { + OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" ); + // first is a substring of the second string => less (negative value) + nRet = -1; + } + else + { + // greater or equal + nRet = pStr1End - pStr1; + } + + return nRet; +} + +/* ----------------------------------------------------------------------- */ + +void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis, + const sal_Char* pCharStr ) + SAL_THROW_EXTERN_C() +{ + sal_Int32 nLen; + + if ( pCharStr ) + { + const sal_Char* pTempStr = pCharStr; + while( *pTempStr ) + pTempStr++; + nLen = pTempStr-pCharStr; + } + else + nLen = 0; + + if ( !nLen ) + { + IMPL_RTL_STRINGNAME( new )( ppThis ); + return; + } + + if ( *ppThis ) + IMPL_RTL_STRINGNAME( release )( *ppThis ); + + *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); + OSL_ASSERT(*ppThis != NULL); + if ( (*ppThis) ) + { + IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer; + do + { + /* Check ASCII range */ + OSL_ENSURE( ((unsigned char)*pCharStr) <= 127, + "rtl_uString_newFromAscii() - Found ASCII char > 127" ); + + *pBuffer = *pCharStr; + pBuffer++; + pCharStr++; + } + while ( *pCharStr ); + } +} + +void SAL_CALL rtl_uString_newFromCodePoints( + rtl_uString ** newString, sal_uInt32 const * codePoints, + sal_Int32 codePointCount) SAL_THROW_EXTERN_C() +{ + sal_Int32 n; + sal_Int32 i; + sal_Unicode * p; + OSL_ASSERT( + newString != NULL && + (codePoints != NULL || codePointCount == 0) && + codePointCount >= 0); + if (codePointCount == 0) { + rtl_uString_new(newString); + return; + } + if (*newString != NULL) { + rtl_uString_release(*newString); + } + n = codePointCount; + for (i = 0; i < codePointCount; ++i) { + OSL_ASSERT(codePoints[i] <= 0x10FFFF); + if (codePoints[i] >= 0x10000) { + ++n; + } + } + /* Builds on the assumption that sal_Int32 uses 32 bit two's complement + representation with wrap around (the necessary number of UTF-16 code + units will be no larger than 2 * SAL_MAX_INT32, represented as + sal_Int32 -2): */ + if (n < 0) { + *newString = NULL; + return; + } + *newString = rtl_uString_ImplAlloc(n); + if (*newString == NULL) { + return; + } + p = (*newString)->buffer; + for (i = 0; i < codePointCount; ++i) { + sal_uInt32 c = codePoints[i]; + if (c < 0x10000) { + *p++ = (sal_Unicode) c; + } else { + c -= 0x10000; + *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE); + *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE); + } + } +} + +/* ======================================================================= */ + +static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen ) +{ + int n; + sal_uChar c; + const sal_Char* pEndStr; + + n = 0; + pEndStr = pStr+nLen; + while ( pStr < pEndStr ) + { + c = (sal_uChar)*pStr; + + if ( !(c & 0x80) ) + pStr++; + else if ( (c & 0xE0) == 0xC0 ) + pStr += 2; + else if ( (c & 0xF0) == 0xE0 ) + pStr += 3; + else if ( (c & 0xF8) == 0xF0 ) + pStr += 4; + else if ( (c & 0xFC) == 0xF8 ) + pStr += 5; + else if ( (c & 0xFE) == 0xFC ) + pStr += 6; + else + pStr++; + + n++; + } + + return n; +} + +/* ----------------------------------------------------------------------- */ + +static void rtl_string2UString_status( rtl_uString** ppThis, + const sal_Char* pStr, + sal_Int32 nLen, + rtl_TextEncoding eTextEncoding, + sal_uInt32 nCvtFlags, + sal_uInt32 *pInfo ) +{ + OSL_ENSURE(nLen == 0 || rtl_isOctetTextEncoding(eTextEncoding), + "rtl_string2UString_status() - Wrong TextEncoding" ); + + if ( !nLen ) + { + rtl_uString_new( ppThis ); + if (pInfo != NULL) { + *pInfo = 0; + } + } + else + { + if ( *ppThis ) + IMPL_RTL_STRINGNAME( release )( *ppThis ); + + /* Optimization for US-ASCII */ + if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US ) + { + IMPL_RTL_STRCODE* pBuffer; + *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); + if (*ppThis == NULL) { + if (pInfo != NULL) { + *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | + RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + } + return; + } + pBuffer = (*ppThis)->buffer; + do + { + /* Check ASCII range */ + OSL_ENSURE( ((unsigned char)*pStr) <= 127, + "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" ); + + *pBuffer = *pStr; + pBuffer++; + pStr++; + nLen--; + } + while ( nLen ); + if (pInfo != NULL) { + *pInfo = 0; + } + } + else + { + rtl_uString* pTemp; + rtl_uString* pTemp2 = NULL; + rtl_TextToUnicodeConverter hConverter; + sal_uInt32 nInfo; + sal_Size nSrcBytes; + sal_Size nDestChars; + sal_Size nNewLen; + + /* Optimization for UTF-8 - we try to calculate the exact length */ + /* For all other encoding we try the maximum - and reallocate + the buffer if needed */ + if ( eTextEncoding == RTL_TEXTENCODING_UTF8 ) + { + nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen ); + /* Includes the string only ASCII, then we could copy + the buffer faster */ + if ( nNewLen == (sal_Size)nLen ) + { + IMPL_RTL_STRCODE* pBuffer; + *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); + if (*ppThis == NULL) + { + if (pInfo != NULL) { + *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | + RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + } + return; + } + pBuffer = (*ppThis)->buffer; + do + { + /* Check ASCII range */ + OSL_ENSURE( ((unsigned char)*pStr) <= 127, + "rtl_string2UString_status() - UTF8 test encoding is wrong" ); + + *pBuffer = *pStr; + pBuffer++; + pStr++; + nLen--; + } + while ( nLen ); + if (pInfo != NULL) { + *pInfo = 0; + } + return; + } + } + else + nNewLen = nLen; + + nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH; + hConverter = rtl_createTextToUnicodeConverter( eTextEncoding ); + + pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); + if (pTemp == NULL) { + if (pInfo != NULL) { + *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | + RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + } + return; + } + nDestChars = rtl_convertTextToUnicode( hConverter, 0, + pStr, nLen, + pTemp->buffer, nNewLen, + nCvtFlags, + &nInfo, &nSrcBytes ); + + /* Buffer not big enough, try again with enough space */ + /* Shouldn't be the case, but if we get textencoding which + could results in more unicode characters we have this + code here. Could be the case for apple encodings */ + while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL ) + { + rtl_freeMemory( pTemp ); + nNewLen += 8; + pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); + if (pTemp == NULL) { + if (pInfo != NULL) { + *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | + RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + } + return; + } + nDestChars = rtl_convertTextToUnicode( hConverter, 0, + pStr, nLen, + pTemp->buffer, nNewLen, + nCvtFlags, + &nInfo, &nSrcBytes ); + } + + if (pInfo) + *pInfo = nInfo; + + /* Set the buffer to the correct size or if there is too + much overhead, reallocate to the correct size */ + if ( nNewLen > nDestChars+8 ) + { + pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars ); + } + if (pTemp2 != NULL) + { + rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars); + rtl_freeMemory(pTemp); + pTemp = pTemp2; + } + else + { + pTemp->length = nDestChars; + pTemp->buffer[nDestChars] = 0; + } + + rtl_destroyTextToUnicodeConverter( hConverter ); + *ppThis = pTemp; + + /* Results the conversion in an empty buffer - + create an empty string */ + if ( pTemp && !nDestChars ) + rtl_uString_new( ppThis ); + } + } +} + +void SAL_CALL rtl_string2UString( rtl_uString** ppThis, + const sal_Char* pStr, + sal_Int32 nLen, + rtl_TextEncoding eTextEncoding, + sal_uInt32 nCvtFlags ) SAL_THROW_EXTERN_C() +{ + rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding, + nCvtFlags, NULL ); +} + +/* ----------------------------------------------------------------------- */ + +enum StrLifecycle { + CANNOT_RETURN, + CAN_RETURN = 1 +}; + +static oslMutex +getInternMutex() +{ + static oslMutex pPoolGuard = NULL; + if( !pPoolGuard ) + { + oslMutex pGlobalGuard; + pGlobalGuard = *osl_getGlobalMutex(); + osl_acquireMutex( pGlobalGuard ); + if( !pPoolGuard ) + { + oslMutex p = osl_createMutex(); + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + pPoolGuard = p; + } + osl_releaseMutex( pGlobalGuard ); + } + else + { + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + } + + return pPoolGuard; +} + +/* returns true if we found a dup in the pool */ +static void rtl_ustring_intern_internal( rtl_uString ** newStr, + rtl_uString * str, + StrLifecycle can_return ) +{ + oslMutex pPoolMutex; + + pPoolMutex = getInternMutex(); + + osl_acquireMutex( pPoolMutex ); + + *newStr = rtl_str_hash_intern (str, can_return); + + osl_releaseMutex( pPoolMutex ); + + if( can_return && *newStr != str ) + { /* we dupped, then found a match */ + rtl_freeMemory( str ); + } +} + +void SAL_CALL rtl_uString_intern( rtl_uString ** newStr, + rtl_uString * str) SAL_THROW_EXTERN_C() +{ + if (SAL_STRING_IS_INTERN(str)) + { + IMPL_RTL_AQUIRE( str ); + *newStr = str; + } + else + { + rtl_uString *pOrg = *newStr; + *newStr = NULL; + rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN ); + if (pOrg) + rtl_uString_release (pOrg); + } +} + +static int rtl_canGuessUOutputLength( int len, rtl_TextEncoding eTextEncoding ) +{ + // FIXME: Maybe we should use a bit flag in the higher bits of the + // eTextEncoding value itself to determine the encoding type. But if we + // do, be sure to mask the value in certain places that expect the values + // to be numbered serially from 0 and up. One such place is + // Impl_getTextEncodingData(). + + switch ( eTextEncoding ) + { + // 1 to 1 (with no zero elements) + case RTL_TEXTENCODING_IBM_437: + case RTL_TEXTENCODING_IBM_850: + case RTL_TEXTENCODING_IBM_860: + case RTL_TEXTENCODING_IBM_861: + case RTL_TEXTENCODING_IBM_863: + case RTL_TEXTENCODING_IBM_865: + return len; + break; + } + return 0; +} + +void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr, + const sal_Char * str, + sal_Int32 len, + rtl_TextEncoding eTextEncoding, + sal_uInt32 convertFlags, + sal_uInt32 * pInfo ) + SAL_THROW_EXTERN_C() +{ + rtl_uString *scratch; + + if (*newStr) + { + rtl_uString_release (*newStr); + *newStr = NULL; + } + + if ( len < 256 ) + { // try various optimisations + sal_Int32 ulen; + if ( len < 0 ) + len = strlen( str ); + if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US ) + { + int i; + rtl_uString *pScratch; + pScratch = static_cast< rtl_uString * >( + alloca(sizeof (rtl_uString) + len * sizeof (IMPL_RTL_STRCODE))); + for (i = 0; i < len; i++) + { + /* Check ASCII range */ + OSL_ENSURE( ((unsigned char)str[i]) <= 127, + "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" ); + pScratch->buffer[i] = str[i]; + } + pScratch->length = len; + rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN ); + return; + } + else if ( (ulen = rtl_canGuessUOutputLength(len, eTextEncoding)) != 0 ) + { + rtl_uString *pScratch; + rtl_TextToUnicodeConverter hConverter; + sal_Size nSrcBytes; + sal_uInt32 nInfo; + + pScratch = static_cast< rtl_uString * >( + alloca( + sizeof (rtl_uString) + ulen * sizeof (IMPL_RTL_STRCODE))); + + hConverter = rtl_createTextToUnicodeConverter( eTextEncoding ); + rtl_convertTextToUnicode( + hConverter, 0, str, len, pScratch->buffer, ulen, convertFlags, &nInfo, &nSrcBytes ); + rtl_destroyTextToUnicodeConverter( hConverter ); + + if (pInfo) + *pInfo = nInfo; + + pScratch->length = ulen; + rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN ); + return; + } + + /* FIXME: we want a nice UTF-8 / alloca shortcut here */ + } + + scratch = NULL; + rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags, + pInfo ); + if (!scratch) { + return; + } + rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN ); +} + +static void +internRelease (rtl_uString *pThis) +{ + oslMutex pPoolMutex; + + rtl_uString *pFree = NULL; + if ( SAL_STRING_REFCOUNT( + osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0) + { + pPoolMutex = getInternMutex(); + osl_acquireMutex( pPoolMutex ); + + rtl_str_hash_remove (pThis); + + /* May have been separately acquired */ + if ( SAL_STRING_REFCOUNT( + osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 ) + { + /* we got the last ref */ + pFree = pThis; + } + else /* very unusual */ + { + internRelease (pThis); + } + + osl_releaseMutex( pPoolMutex ); + } + if (pFree) + rtl_freeMemory (pFree); +} + +sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints( + rtl_uString const * string, sal_Int32 * indexUtf16, + sal_Int32 incrementCodePoints) +{ + sal_Int32 n; + sal_Unicode cu; + sal_uInt32 cp; + OSL_ASSERT(string != NULL && indexUtf16 != NULL); + n = *indexUtf16; + OSL_ASSERT(n >= 0 && n <= string->length); + while (incrementCodePoints < 0) { + OSL_ASSERT(n > 0); + cu = string->buffer[--n]; + if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 && + SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1])) + { + --n; + } + ++incrementCodePoints; + } + OSL_ASSERT(n >= 0 && n < string->length); + cu = string->buffer[n]; + if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 && + SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1])) + { + cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]); + } else { + cp = cu; + } + while (incrementCodePoints > 0) { + OSL_ASSERT(n < string->length); + cu = string->buffer[n++]; + if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length && + SAL_RTL_IS_LOW_SURROGATE(string->buffer[n])) + { + ++n; + } + --incrementCodePoints; + } + OSL_ASSERT(n >= 0 && n <= string->length); + *indexUtf16 = n; + return cp; +} + +sal_Bool rtl_convertStringToUString( + rtl_uString ** target, char const * source, sal_Int32 length, + rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C() +{ + sal_uInt32 info; + rtl_string2UString_status(target, source, length, encoding, flags, &info); + return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |