diff options
Diffstat (limited to 'sal/rtl/source/uri.cxx')
-rw-r--r-- | sal/rtl/source/uri.cxx | 799 |
1 files changed, 799 insertions, 0 deletions
diff --git a/sal/rtl/source/uri.cxx b/sal/rtl/source/uri.cxx new file mode 100644 index 000000000000..551c4f199251 --- /dev/null +++ b/sal/rtl/source/uri.cxx @@ -0,0 +1,799 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// MARKER(update_precomp.py): autogen include statement, do not remove +#include "precompiled_sal.hxx" + +#include "rtl/uri.h" + +#include "surrogates.h" + +#include "osl/diagnose.h" +#include "rtl/strbuf.hxx" +#include "rtl/textenc.h" +#include "rtl/textcvt.h" +#include "rtl/uri.h" +#include "rtl/ustrbuf.h" +#include "rtl/ustrbuf.hxx" +#include "rtl/ustring.h" +#include "rtl/ustring.hxx" +#include "sal/types.h" + +#include <cstddef> + +namespace { + +std::size_t const nCharClassSize = 128; + +sal_Unicode const cEscapePrefix = 0x25; // '%' + +inline bool isDigit(sal_uInt32 nUtf32) +{ + return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9' +} + +inline bool isAlpha(sal_uInt32 nUtf32) +{ + // 'A'--'Z', 'a'--'z' + return ( + (nUtf32 >= 0x41 && nUtf32 <= 0x5A) || + (nUtf32 >= 0x61 && nUtf32 <= 0x7A) + ); +} + +inline bool isHighSurrogate(sal_uInt32 nUtf16) +{ + return SAL_RTL_IS_HIGH_SURROGATE(nUtf16); +} + +inline bool isLowSurrogate(sal_uInt32 nUtf16) +{ + return SAL_RTL_IS_LOW_SURROGATE(nUtf16); +} + +inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) +{ + return SAL_RTL_COMBINE_SURROGATES(high, low); +} + +inline int getHexWeight(sal_uInt32 nUtf32) +{ + return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9' + static_cast< int >(nUtf32 - 0x30) : + nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F' + static_cast< int >(nUtf32 - 0x41 + 10) : + nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f' + static_cast< int >(nUtf32 - 0x61 + 10) : + -1; // not a hex digit +} + +inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32) +{ + return nUtf32 < nCharClassSize && pCharClass[nUtf32]; +} + +inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity, + sal_Unicode cChar) +{ + rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1); +} + +enum EscapeType +{ + EscapeNo, + EscapeChar, + EscapeOctet +}; + +/* Read any of the following: + + - sequence of escape sequences representing character from eCharset, + translated to single UCS4 character; or + + - pair of UTF-16 surrogates, translated to single UCS4 character; or + + _ single UTF-16 character, extended to UCS4 character. + */ +sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, + bool bEncoded, rtl_TextEncoding eCharset, + EscapeType * pType) +{ + sal_uInt32 nChar = *(*pBegin)++; + int nWeight1; + int nWeight2; + if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2 + && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0 + && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0) + { + *pBegin += 2; + nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2); + if (nChar <= 0x7F) + *pType = EscapeChar; + else if (eCharset == RTL_TEXTENCODING_UTF8) + { + if (nChar >= 0xC0 && nChar <= 0xF4) + { + sal_uInt32 nEncoded; + int nShift; + sal_uInt32 nMin; + if (nChar <= 0xDF) + { + nEncoded = (nChar & 0x1F) << 6; + nShift = 0; + nMin = 0x80; + } + else if (nChar <= 0xEF) + { + nEncoded = (nChar & 0x0F) << 12; + nShift = 6; + nMin = 0x800; + } + else + { + nEncoded = (nChar & 0x07) << 18; + nShift = 12; + nMin = 0x10000; + } + sal_Unicode const * p = *pBegin; + bool bUTF8 = true; + for (; nShift >= 0; nShift -= 6) + { + if (pEnd - p < 3 || p[0] != cEscapePrefix + || (nWeight1 = getHexWeight(p[1])) < 8 + || nWeight1 > 11 + || (nWeight2 = getHexWeight(p[2])) < 0) + { + bUTF8 = sal_False; + break; + } + p += 3; + nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; + } + if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded) + && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF) + { + *pBegin = p; + *pType = EscapeChar; + return nEncoded; + } + } + *pType = EscapeOctet; + } + else + { + rtl::OStringBuffer aBuf; + aBuf.append(static_cast< char >(nChar)); + rtl_TextToUnicodeConverter aConverter + = rtl_createTextToUnicodeConverter(eCharset); + sal_Unicode const * p = *pBegin; + for (;;) + { + sal_Unicode aDst[2]; + sal_uInt32 nInfo; + sal_Size nConverted; + sal_Size nDstSize = rtl_convertTextToUnicode( + aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst, + sizeof aDst / sizeof aDst[0], + (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR), + &nInfo, &nConverted); + if (nInfo == 0) + { + OSL_ASSERT( + nConverted + == sal::static_int_cast< sal_uInt32 >( + aBuf.getLength())); + rtl_destroyTextToUnicodeConverter(aConverter); + *pBegin = p; + *pType = EscapeChar; + OSL_ASSERT( + nDstSize == 1 + || (nDstSize == 2 && isHighSurrogate(aDst[0]) + && isLowSurrogate(aDst[1]))); + return nDstSize == 1 + ? aDst[0] : combineSurrogates(aDst[0], aDst[1]); + } + else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL + && pEnd - p >= 3 && p[0] == cEscapePrefix + && (nWeight1 = getHexWeight(p[1])) >= 0 + && (nWeight2 = getHexWeight(p[2])) >= 0) + { + p += 3; + aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2)); + } + else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL + && p != pEnd && *p <= 0x7F) + { + aBuf.append(static_cast< char >(*p++)); + } + else + { + OSL_ASSERT( + (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL) + == 0); + break; + } + } + rtl_destroyTextToUnicodeConverter(aConverter); + *pType = EscapeOctet; + } + return nChar; + } + else + { + *pType = EscapeNo; + return isHighSurrogate(nChar) && *pBegin < pEnd + && isLowSurrogate(**pBegin) ? + combineSurrogates(nChar, *(*pBegin)++) : nChar; + } +} + +void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32) +{ + OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); + if (nUtf32 <= 0xFFFF) { + writeUnicode( + pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32)); + } else { + nUtf32 -= 0x10000; + writeUnicode( + pBuffer, pCapacity, + static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800)); + writeUnicode( + pBuffer, pCapacity, + static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00)); + } +} + +void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity, + sal_uInt32 nOctet) +{ + OSL_ENSURE(nOctet <= 0xFF, "bad octet"); + + static sal_Unicode const aHex[16] + = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */ + + writeUnicode(pBuffer, pCapacity, cEscapePrefix); + writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]); + writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]); +} + +bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity, + sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict) +{ + OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); + if (eCharset == RTL_TEXTENCODING_UTF8) { + if (nUtf32 < 0x80) + writeEscapeOctet(pBuffer, pCapacity, nUtf32); + else if (nUtf32 < 0x800) + { + writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0); + writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); + } + else if (nUtf32 < 0x10000) + { + writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0); + writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); + writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); + } + else + { + writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0); + writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80); + writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); + writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); + } + } else { + rtl_UnicodeToTextConverter aConverter + = rtl_createUnicodeToTextConverter(eCharset); + sal_Unicode aSrc[2]; + sal_Size nSrcSize; + if (nUtf32 <= 0xFFFF) + { + aSrc[0] = static_cast< sal_Unicode >(nUtf32); + nSrcSize = 1; + } + else + { + aSrc[0] = static_cast< sal_Unicode >( + ((nUtf32 - 0x10000) >> 10) | 0xD800); + aSrc[1] = static_cast< sal_Unicode >( + ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00); + nSrcSize = 2; + } + sal_Char aDst[32]; // FIXME random value + sal_uInt32 nInfo; + sal_Size nConverted; + sal_Size nDstSize = rtl_convertUnicodeToText( + aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst, + RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR + | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR + | RTL_UNICODETOTEXT_FLAGS_FLUSH, + &nInfo, &nConverted); + OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0); + rtl_destroyUnicodeToTextConverter(aConverter); + if (nInfo == 0) { + OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText"); + for (sal_Size i = 0; i < nDstSize; ++i) + writeEscapeOctet(pBuffer, pCapacity, + static_cast< unsigned char >(aDst[i])); + // FIXME all octets are escaped, even if there is no need + } else { + if (bStrict) { + return false; + } else { + writeUcs4(pBuffer, pCapacity, nUtf32); + } + } + } + return true; +} + +struct Component +{ + sal_Unicode const * pBegin; + sal_Unicode const * pEnd; + + inline Component(): pBegin(0) {} + + inline bool isPresent() const { return pBegin != 0; } + + inline sal_Int32 getLength() const; +}; + +inline sal_Int32 Component::getLength() const +{ + OSL_ENSURE(isPresent(), "taking length of non-present component"); + return static_cast< sal_Int32 >(pEnd - pBegin); +} + +struct Components +{ + Component aScheme; + Component aAuthority; + Component aPath; + Component aQuery; + Component aFragment; +}; + +void parseUriRef(rtl_uString const * pUriRef, Components * pComponents) +{ + // This algorithm is liberal and accepts various forms of illegal input. + + sal_Unicode const * pBegin = pUriRef->buffer; + sal_Unicode const * pEnd = pBegin + pUriRef->length; + sal_Unicode const * pPos = pBegin; + + if (pPos != pEnd && isAlpha(*pPos)) + for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p) + if (*p == ':') + { + pComponents->aScheme.pBegin = pBegin; + pComponents->aScheme.pEnd = ++p; + pPos = p; + break; + } + else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-' + && *p != '.') + break; + + if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') + { + pComponents->aAuthority.pBegin = pPos; + pPos += 2; + while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#') + ++pPos; + pComponents->aAuthority.pEnd = pPos; + } + + pComponents->aPath.pBegin = pPos; + while (pPos != pEnd && *pPos != '?' && * pPos != '#') + ++pPos; + pComponents->aPath.pEnd = pPos; + + if (pPos != pEnd && *pPos == '?') + { + pComponents->aQuery.pBegin = pPos++; + while (pPos != pEnd && * pPos != '#') + ++pPos; + pComponents->aQuery.pEnd = pPos; + } + + if (pPos != pEnd) + { + OSL_ASSERT(*pPos == '#'); + pComponents->aFragment.pBegin = pPos; + pComponents->aFragment.pEnd = pEnd; + } +} + +rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath) +{ + OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/'); + OSL_ASSERT(rRelPath.isPresent()); + + // The invariant of aBuffer is that it always starts and ends with a slash + // (until probably right at the end of the algorithm, when the last segment + // of rRelPath is added, which does not necessarily end in a slash): + rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength()); + // XXX numeric overflow + + // Segments "." and ".." within rBasePath are not conisdered special (but + // are also not removed by ".." segments within rRelPath), RFC 2396 seems a + // bit unclear about this point: + sal_Int32 nFixed = 1; + sal_Unicode const * p = rBasePath.pBegin + 1; + for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q) + if (*q == '/') + { + if ( + (q - p == 1 && p[0] == '.') || + (q - p == 2 && p[0] == '.' && p[1] == '.') + ) + { + nFixed = q + 1 - rBasePath.pBegin; + } + p = q + 1; + } + aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin); + + p = rRelPath.pBegin; + if (p != rRelPath.pEnd) + for (;;) + { + sal_Unicode const * q = p; + sal_Unicode const * r; + for (;;) + { + if (q == rRelPath.pEnd) + { + r = q; + break; + } + if (*q == '/') + { + r = q + 1; + break; + } + ++q; + } + if (q - p == 2 && p[0] == '.' && p[1] == '.') + { + // Erroneous excess segments ".." within rRelPath are left + // intact, as the examples in RFC 2396, section C.2, suggest: + sal_Int32 i = aBuffer.getLength() - 1; + if (i < nFixed) + { + aBuffer.append(p, r - p); + nFixed += 3; + } + else + { + while (aBuffer.charAt(i - 1) != '/') + --i; + aBuffer.setLength(i); + } + } + else if (q - p != 1 || *p != '.') + aBuffer.append(p, r - p); + if (q == rRelPath.pEnd) + break; + p = q + 1; + } + + return aBuffer.makeStringAndClear(); +} + +} + +sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass) + SAL_THROW_EXTERN_C() +{ + static sal_Bool const aCharClass[][nCharClassSize] + = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */ + }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ + }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ + }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ + }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ + }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ + }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ + }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ + }}; + OSL_ENSURE( + (eCharClass >= 0 + && (sal::static_int_cast< std::size_t >(eCharClass) + < sizeof aCharClass / sizeof aCharClass[0])), + "bad eCharClass"); + return aCharClass[eCharClass]; +} + +void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass, + rtl_UriEncodeMechanism eMechanism, + rtl_TextEncoding eCharset, rtl_uString ** pResult) + SAL_THROW_EXTERN_C() +{ + OSL_ENSURE(!pCharClass[0x25], "bad pCharClass"); + // make sure the percent sign is encoded... + + sal_Unicode const * p = pText->buffer; + sal_Unicode const * pEnd = p + pText->length; + sal_Int32 nCapacity = 0; + rtl_uString_new(pResult); + while (p < pEnd) + { + EscapeType eType; + sal_uInt32 nUtf32 = readUcs4( + &p, pEnd, + (eMechanism == rtl_UriEncodeKeepEscapes + || eMechanism == rtl_UriEncodeCheckEscapes + || eMechanism == rtl_UriEncodeStrictKeepEscapes), + eCharset, &eType); + switch (eType) + { + case EscapeNo: + if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F + writeUnicode(pResult, &nCapacity, + static_cast< sal_Unicode >(nUtf32)); + else if (!writeEscapeChar( + pResult, &nCapacity, nUtf32, eCharset, + (eMechanism == rtl_UriEncodeStrict + || eMechanism == rtl_UriEncodeStrictKeepEscapes))) + { + rtl_uString_new(pResult); + return; + } + break; + + case EscapeChar: + if (eMechanism == rtl_UriEncodeCheckEscapes + && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F + writeUnicode(pResult, &nCapacity, + static_cast< sal_Unicode >(nUtf32)); + else if (!writeEscapeChar( + pResult, &nCapacity, nUtf32, eCharset, + (eMechanism == rtl_UriEncodeStrict + || eMechanism == rtl_UriEncodeStrictKeepEscapes))) + { + rtl_uString_new(pResult); + return; + } + break; + + case EscapeOctet: + writeEscapeOctet(pResult, &nCapacity, nUtf32); + break; + } + } +} + +void SAL_CALL rtl_uriDecode(rtl_uString * pText, + rtl_UriDecodeMechanism eMechanism, + rtl_TextEncoding eCharset, rtl_uString ** pResult) + SAL_THROW_EXTERN_C() +{ + switch (eMechanism) + { + case rtl_UriDecodeNone: + rtl_uString_assign(pResult, pText); + break; + + case rtl_UriDecodeToIuri: + eCharset = RTL_TEXTENCODING_UTF8; + default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict + { + sal_Unicode const * p = pText->buffer; + sal_Unicode const * pEnd = p + pText->length; + sal_Int32 nCapacity = 0; + rtl_uString_new(pResult); + while (p < pEnd) + { + EscapeType eType; + sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType); + switch (eType) + { + case EscapeChar: + if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri) + { + writeEscapeOctet(pResult, &nCapacity, nUtf32); + break; + } + case EscapeNo: + writeUcs4(pResult, &nCapacity, nUtf32); + break; + + case EscapeOctet: + if (eMechanism == rtl_UriDecodeStrict) { + rtl_uString_new(pResult); + return; + } + writeEscapeOctet(pResult, &nCapacity, nUtf32); + break; + } + } + } + break; + } +} + +sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef, + rtl_uString * pRelUriRef, + rtl_uString ** pResult, + rtl_uString ** pException) + SAL_THROW_EXTERN_C() +{ + // If pRelUriRef starts with a scheme component it is an absolute URI + // reference, and we are done (i.e., this algorithm does not support + // backwards-compatible relative URIs starting with a scheme component, see + // RFC 2396, section 5.2, step 3): + Components aRelComponents; + parseUriRef(pRelUriRef, &aRelComponents); + if (aRelComponents.aScheme.isPresent()) + { + rtl_uString_assign(pResult, pRelUriRef); + return true; + } + + // Parse pBaseUriRef; if the scheme component is not present or not valid, + // or the path component is not empty and starts with anything but a slash, + // an exception is raised: + Components aBaseComponents; + parseUriRef(pBaseUriRef, &aBaseComponents); + if (!aBaseComponents.aScheme.isPresent()) + { + rtl::OUString aMessage(pBaseUriRef); + aMessage += rtl::OUString( + RTL_CONSTASCII_USTRINGPARAM( + " does not start with a scheme component")); + rtl_uString_assign(pException, + const_cast< rtl::OUString & >(aMessage).pData); + return false; + } + if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd + && *aBaseComponents.aPath.pBegin != '/') + { + rtl::OUString aMessage(pBaseUriRef); + aMessage += rtl::OUString( + RTL_CONSTASCII_USTRINGPARAM( + "path component does not start with slash")); + rtl_uString_assign(pException, aMessage.pData); + return false; + } + + // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI + // into an absolute one (if the relative URI is a reference to the "current + // document," the "current document" is here taken to be the base URI): + rtl::OUStringBuffer aBuffer; + aBuffer.append(aBaseComponents.aScheme.pBegin, + aBaseComponents.aScheme.getLength()); + if (aRelComponents.aAuthority.isPresent()) + { + aBuffer.append(aRelComponents.aAuthority.pBegin, + aRelComponents.aAuthority.getLength()); + aBuffer.append(aRelComponents.aPath.pBegin, + aRelComponents.aPath.getLength()); + if (aRelComponents.aQuery.isPresent()) + aBuffer.append(aRelComponents.aQuery.pBegin, + aRelComponents.aQuery.getLength()); + } + else + { + if (aBaseComponents.aAuthority.isPresent()) + aBuffer.append(aBaseComponents.aAuthority.pBegin, + aBaseComponents.aAuthority.getLength()); + if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd + && !aRelComponents.aQuery.isPresent()) + { + aBuffer.append(aBaseComponents.aPath.pBegin, + aBaseComponents.aPath.getLength()); + if (aBaseComponents.aQuery.isPresent()) + aBuffer.append(aBaseComponents.aQuery.pBegin, + aBaseComponents.aQuery.getLength()); + } + else + { + if (*aRelComponents.aPath.pBegin == '/') + aBuffer.append(aRelComponents.aPath.pBegin, + aRelComponents.aPath.getLength()); + else + aBuffer.append(joinPaths(aBaseComponents.aPath, + aRelComponents.aPath)); + if (aRelComponents.aQuery.isPresent()) + aBuffer.append(aRelComponents.aQuery.pBegin, + aRelComponents.aQuery.getLength()); + } + } + if (aRelComponents.aFragment.isPresent()) + aBuffer.append(aRelComponents.aFragment.pBegin, + aRelComponents.aFragment.getLength()); + rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData); + return true; +} |