diff options
Diffstat (limited to 'sal/textenc/convertsinglebytetobmpunicode.cxx')
-rw-r--r-- | sal/textenc/convertsinglebytetobmpunicode.cxx | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/sal/textenc/convertsinglebytetobmpunicode.cxx b/sal/textenc/convertsinglebytetobmpunicode.cxx new file mode 100644 index 000000000000..1e482249907c --- /dev/null +++ b/sal/textenc/convertsinglebytetobmpunicode.cxx @@ -0,0 +1,211 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// MARKER(update_precomp.py): autogen include statement, do not remove +#include "precompiled_sal.hxx" + +#include "context.h" +#include "converter.h" +#include "convertsinglebytetobmpunicode.hxx" +#include "unichars.h" + +#include "osl/diagnose.h" +#include "rtl/textcvt.h" +#include "sal/types.h" + +#include <cstddef> + +sal_Size rtl_textenc_convertSingleByteToBmpUnicode( + ImplTextConverterData const * data, void *, sal_Char const * srcBuf, + sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars, + sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes) +{ + sal_Unicode const * map = static_cast< + rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( + data)->byteToUnicode; + sal_uInt32 infoFlags = 0; + sal_Size converted = 0; + sal_Unicode * destBufPtr = destBuf; + sal_Unicode * destBufEnd = destBuf + destChars; + for (; converted < srcBytes; ++converted) { + bool undefined = true; + sal_Char b = *srcBuf++; + sal_Unicode c = map[static_cast< sal_uInt8 >(b)]; + if (c == 0xFFFF) { + goto bad_input; + } + if (destBufEnd - destBufPtr < 1) { + goto no_output; + } + *destBufPtr++ = c; + continue; + bad_input: + switch (ImplHandleBadInputTextToUnicodeConversion( + undefined, false, b, flags, &destBufPtr, destBufEnd, + &infoFlags)) + { + case IMPL_BAD_INPUT_STOP: + break; + + case IMPL_BAD_INPUT_CONTINUE: + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + no_output: + --srcBuf; + infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; + break; + } + if (info != 0) { + *info = infoFlags; + } + if (srcCvtBytes != 0) { + *srcCvtBytes = converted; + } + return destBufPtr - destBuf; +} + +sal_Size rtl_textenc_convertBmpUnicodeToSingleByte( + ImplTextConverterData const * data, void * context, + sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf, + sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info, + sal_Size * srcCvtChars) +{ + std::size_t entries = static_cast< + rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( + data)->unicodeToByteEntries; + rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast< + rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( + data)->unicodeToByte; + sal_Unicode highSurrogate = 0; + sal_uInt32 infoFlags = 0; + sal_Size converted = 0; + sal_Char * destBufPtr = destBuf; + sal_Char * destBufEnd = destBuf + destBytes; + if (context != 0) { + highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)-> + m_nHighSurrogate; + } + for (; converted < srcChars; ++converted) { + bool undefined = true; + sal_uInt32 c = *srcBuf++; + if (highSurrogate == 0) { + if (ImplIsHighSurrogate(c)) { + highSurrogate = static_cast< sal_Unicode >(c); + continue; + } + } else if (ImplIsLowSurrogate(c)) { + c = ImplCombineSurrogates(highSurrogate, c); + } else { + undefined = false; + goto bad_input; + } + if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) { + undefined = false; + goto bad_input; + } + // Linearly searching through the ranges if probably fastest, assuming + // that most converted characters belong to the ASCII subset: + for (std::size_t i = 0; i < entries; ++i) { + if (c < ranges[i].unicode) { + break; + } else if (c <= sal::static_int_cast< sal_uInt32 >( + ranges[i].unicode + ranges[i].range)) + { + if (destBufEnd - destBufPtr < 1) { + goto no_output; + } + *destBufPtr++ = static_cast< sal_Char >( + ranges[i].byte + (c - ranges[i].unicode)); + goto done; + } + } + goto bad_input; + done: + highSurrogate = 0; + continue; + bad_input: + switch (ImplHandleBadInputUnicodeToTextConversion( + undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0, + 0, 0)) + { + case IMPL_BAD_INPUT_STOP: + highSurrogate = 0; + break; + + case IMPL_BAD_INPUT_CONTINUE: + highSurrogate = 0; + continue; + + case IMPL_BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + no_output: + --srcBuf; + infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + if (highSurrogate != 0 + && ((infoFlags + & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0)) + { + if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) { + infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + } else { + switch (ImplHandleBadInputUnicodeToTextConversion( + false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0, + 0, 0)) + { + case IMPL_BAD_INPUT_STOP: + case IMPL_BAD_INPUT_CONTINUE: + highSurrogate = 0; + break; + + case IMPL_BAD_INPUT_NO_OUTPUT: + infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + } + } + if (context != 0) { + static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate + = highSurrogate; + } + if (info != 0) { + *info = infoFlags; + } + if (srcCvtChars != 0) { + *srcCvtChars = converted; + } + return destBufPtr - destBuf; +} |