summaryrefslogtreecommitdiff
path: root/include/rtl/character.hxx
diff options
context:
space:
mode:
authorMark Hung <marklh9@gmail.com>2015-12-27 00:46:49 +0800
committerMark Hung <marklh9@gmail.com>2016-02-13 08:05:09 +0000
commit4647e778993250b8c9431e2890750916fb986ecc (patch)
tree99d285ec6a33aeca2d9df32d30d2aea801066a37 /include/rtl/character.hxx
parent3596613153289dae204b5abdc7446b303021f597 (diff)
tdf#81129 Support reading non-BMP characters in HTML documents.
1. Allow character entity ( &#nnnn; ) to exceed 0xffff in HTMLParser::ScanText() 2. Return a character as sal_uInt32 ( utf32 ) instead of sal_Unicode ( utf16 ) from SvParser::GetNextChar(). Conflicts: sw/qa/extras/htmlexport/htmlexport.cxx Change-Id: Ida455040970fae800f0f11471b27f53461fb78e4 Reviewed-on: https://gerrit.libreoffice.org/21152 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Mark Hung <marklh9@gmail.com>
Diffstat (limited to 'include/rtl/character.hxx')
-rw-r--r--include/rtl/character.hxx13
1 files changed, 13 insertions, 0 deletions
diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx
index a3d09b9b0df7..49f6803821de 100644
--- a/include/rtl/character.hxx
+++ b/include/rtl/character.hxx
@@ -222,6 +222,19 @@ sal_uInt32 const surrogatesLowLast = 0xDFFF;
}
/// @endcond
+/** Check if a codepoint is accessible via utf16 per RFC3629
+
+ @param code A non-BMP Unicode code point.
+
+ @return True if the code is a valid codepoint.
+
+ @since LibreOffice 5.2
+*/
+inline bool isValidCodePoint( sal_uInt32 code)
+{
+ return code <= 0x10FFFF;
+}
+
/** Check for high surrogate.
@param code A Unicode code point.