summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Stahl <mstahl@redhat.com>2014-06-02 23:57:13 +0200
committerMiklos Vajna <vmiklos@collabora.co.uk>2014-06-04 07:35:07 +0000
commitd15eb9f09c8854bd58fecd3dc6a31fa678e392a1 (patch)
tree19f8b99fd9f7fa01283599b66ab7ea74c9d3b793
parent1fd9e8e1bdf1ddf32b21d12e3793cb7a2c53e1a1 (diff)
fdo#79384: RTF import: fix literal Shift-JIS text
This is a variable-length encoding, and the second byte may be a RTF syntax character like \, {, }. (cherry picked from commit 061190a62fcdbfb3a0b266d5afffbd257a3e692e) Conflicts: writerfilter/source/rtftok/rtfdocumentimpl.cxx writerfilter/source/rtftok/rtfdocumentimpl.hxx Change-Id: I813ccafda18388af3bf05eb7ce9a0253c627b1c4 Reviewed-on: https://gerrit.libreoffice.org/9632 Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk> Tested-by: Miklos Vajna <vmiklos@collabora.co.uk>
-rw-r--r--sw/qa/extras/rtfimport/data/fdo79384.rtf9
-rw-r--r--sw/qa/extras/rtfimport/rtfimport.cxx8
-rw-r--r--writerfilter/source/rtftok/rtfdocumentimpl.cxx34
-rw-r--r--writerfilter/source/rtftok/rtfdocumentimpl.hxx2
4 files changed, 48 insertions, 5 deletions
diff --git a/sw/qa/extras/rtfimport/data/fdo79384.rtf b/sw/qa/extras/rtfimport/data/fdo79384.rtf
new file mode 100644
index 000000000000..2a900852861d
--- /dev/null
+++ b/sw/qa/extras/rtfimport/data/fdo79384.rtf
@@ -0,0 +1,9 @@
+{\rtf1\ansi
+{\fonttbl{\f5\fnil\fprq0\fcharset128 OpenSymbol{\*\falt Arial Unicode MS};}}
+
+\pard\plain
+
+\dbch\f5 „M„p„‚„{„u„‚„ „ƒ„„y„ƒ„{„p
+„}„\
+
+\par }
diff --git a/sw/qa/extras/rtfimport/rtfimport.cxx b/sw/qa/extras/rtfimport/rtfimport.cxx
index 562b9dfe6e90..e8f40c85ef37 100644
--- a/sw/qa/extras/rtfimport/rtfimport.cxx
+++ b/sw/qa/extras/rtfimport/rtfimport.cxx
@@ -286,6 +286,14 @@ DECLARE_RTFIMPORT_TEST(testN751020, "n751020.rtf")
CPPUNIT_ASSERT_EQUAL(sal_Int32(TWIP_TO_MM100(200)), getProperty<sal_Int32>(xParaEnum->nextElement(), "ParaBottomMargin"));
}
+DECLARE_RTFIMPORT_TEST(testFdo79384, "fdo79384.rtf")
+{
+ uno::Reference<text::XTextRange> xTextRange = getRun(getParagraph(1), 1);
+
+ CPPUNIT_ASSERT_EQUAL(OUString("ΠœΠ°Ρ€ΠΊΠ΅Ρ€Ρ‹ спискамЫ", 31, RTL_TEXTENCODING_UTF8),
+ xTextRange->getString());
+}
+
DECLARE_RTFIMPORT_TEST(testFdo47326, "fdo47326.rtf")
{
// This was 15 only, as \super buffered text, then the contents of it got lost.
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index e8316ffd0156..9946de20fd17 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -963,9 +963,33 @@ int RTFDocumentImpl::resolveChars(char ch)
m_aStates.top().nCharsToSkip--;
}
}
+
// read a single char if we're in hex mode
if (m_aStates.top().nInternalState == INTERNAL_HEX)
break;
+
+ if (RTFParserState::DBCH == m_aStates.top().eRunType &&
+ RTL_TEXTENCODING_MS_932 == m_aStates.top().nCurrentEncoding)
+ {
+ unsigned char uch = ch;
+ if ((uch >= 0x80 && uch <= 0x9F) || uch >= 0xE0)
+ {
+ // read second byte of 2-byte Shift-JIS - may be \ { }
+ Strm() >> ch;
+ if (m_aStates.top().nCharsToSkip == 0)
+ {
+ assert(bUnicodeChecked);
+ aBuf.append(ch);
+ }
+ else
+ {
+ assert(bSkipped);
+ // anybody who uses \ucN with Shift-JIS is insane
+ m_aStates.top().nCharsToSkip--;
+ }
+ }
+ }
+
Strm() >> ch;
}
if (m_aStates.top().nInternalState != INTERNAL_HEX && !Strm().IsEof())
@@ -2747,12 +2771,13 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword)
break;
case RTF_LOCH:
// Noop, dmapper detects this automatically.
+ m_aStates.top().eRunType = RTFParserState::LOCH;
break;
case RTF_HICH:
- m_aStates.top().bIsCjk = true;
+ m_aStates.top().eRunType = RTFParserState::HICH;
break;
case RTF_DBCH:
- m_aStates.top().bIsCjk = false;
+ m_aStates.top().eRunType = RTFParserState::DBCH;
break;
case RTF_TITLEPG:
{
@@ -3165,7 +3190,8 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
if (nKeyword == RTF_F)
nSprm = NS_sprm::LN_CRgFtc0;
else
- nSprm = (m_aStates.top().bIsCjk ? NS_sprm::LN_CRgFtc1 : NS_sprm::LN_CRgFtc2);
+ nSprm = (m_aStates.top().eRunType == RTFParserState::HICH
+ ? NS_sprm::LN_CRgFtc1 : NS_sprm::LN_CRgFtc2);
if (m_aStates.top().nDestinationState == DESTINATION_FONTTABLE || m_aStates.top().nDestinationState == DESTINATION_FONTENTRY)
{
m_aFontIndexes.push_back(nParam);
@@ -5171,7 +5197,7 @@ RTFParserState::RTFParserState(RTFDocumentImpl *pDocumentImpl)
aShape(),
aDrawingObject(),
aFrame(this),
- bIsCjk(false),
+ eRunType(LOCH),
nYear(0),
nMonth(0),
nDay(0),
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index 1d10b6f7047e..a6e44067ea30 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -254,7 +254,7 @@ namespace writerfilter {
RTFFrame aFrame;
/// CJK or CTL?
- bool bIsCjk;
+ enum { LOCH, HICH, DBCH } eRunType;
// Info group.
int nYear;