summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Stahl <mstahl@redhat.com>2014-06-12 12:16:28 +0200
committerCaolán McNamara <caolanm@redhat.com>2014-06-12 13:48:22 +0000
commit8f80cde668b50c939aeff0d98a3e67c362df1fd4 (patch)
tree6f0d399f80dbd4d0449caf00d0cb868926b62628
parent80b6b175f438dd56f6c107d956e6c00876ff3dba (diff)
fdo#77979: sw: RTF export: write non-ASCII font names encoded
Currently font names like "微软雅黑" (Microsoft YaHei) are written as "????" in the RTF export; to avoid that, set the \fcharset of the font entry to something that at least is able to encode the font name and alternate name. This requires a new function since the existing rtl_TextEncodingToWinCharset was changed in b88fe998ce8c80d7629fe70118311096615d959d to return "default" 0x01 (for OOXML) which is quite unhelpful for RTF. This is not entirely satisfactory, as of course that is no guarantee that the encoding can represent all of the actual text that has the font applied; hence there are some \'3f in the fall-back encoded text of the heading of the bugdoc, which indicates that the detected Shift-JIS is insufficient and GB-2132 would be required; but it's not obvious how to do better here without iterating over all the text twice, and that still leaves the possibility that all text that has a particular font applied cannot be represented by a single non-Unicode encoding. But since we always write text as the \u Unicode + legacy fall-back, this should not be a big problem since modern RTF readers will simply read the Unicode. (cherry picked from commit e47a02b1524061143d8e77a54eb95c77f2e6dae2) fdo#77979: argh forgot to add the test document (cherry picked from commit 90b2b378aecfa1914be0ce9aa7aa4e006e225e96) (cherry picked from commit 276fb59ee66806709382d0eeef20f62a094a5995) error C2361: initialization of 's_fallbacks' is skipped by 'default' (cherry picked from commit f3695bbc17a547b547876dd1175c0b74e5b3e90e) Conflicts: sw/source/filter/ww8/rtfattributeoutput.cxx Spell out UTF-8 (cherry picked from commit 809c8d4c990e77cd8ba52672be7c5e77aae4e90e) Change-Id: Ie6a42294c501d014dd9f0df82638519412ca19bb Reviewed-on: https://gerrit.libreoffice.org/9750 Reviewed-by: Caolán McNamara <caolanm@redhat.com> Tested-by: Caolán McNamara <caolanm@redhat.com>
-rw-r--r--sw/qa/extras/rtfexport/data/fdo77979.odtbin0 -> 9009 bytes
-rw-r--r--sw/qa/extras/rtfexport/rtfexport.cxx7
-rw-r--r--sw/source/filter/inc/msfilter.hxx12
-rw-r--r--sw/source/filter/ww8/rtfattributeoutput.cxx20
-rw-r--r--sw/source/filter/ww8/writerwordglue.cxx49
-rw-r--r--sw/source/filter/ww8/wrtw8sty.cxx3
6 files changed, 85 insertions, 6 deletions
diff --git a/sw/qa/extras/rtfexport/data/fdo77979.odt b/sw/qa/extras/rtfexport/data/fdo77979.odt
new file mode 100644
index 000000000000..532724c8565b
--- /dev/null
+++ b/sw/qa/extras/rtfexport/data/fdo77979.odt
Binary files differ
diff --git a/sw/qa/extras/rtfexport/rtfexport.cxx b/sw/qa/extras/rtfexport/rtfexport.cxx
index d58e20088b46..a3fb2cb29899 100644
--- a/sw/qa/extras/rtfexport/rtfexport.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport.cxx
@@ -306,6 +306,13 @@ DECLARE_RTFEXPORT_TEST(testMathRuns, "math-runs.rtf")
CPPUNIT_ASSERT_EQUAL(OUString("\\{ left [ right ] left ( right ) \\}"), getFormula(getRun(getParagraph(1), 1)));
}
+DECLARE_RTFEXPORT_TEST(testFdo77979, "fdo77979.odt")
+{
+ // font name is encoded with \fcharset of font
+ CPPUNIT_ASSERT_EQUAL(OUString("\xE5\xBE\xAE\xE8\xBD\xAF\xE9\x9B\x85\xE9\xBB\x91", 12, RTL_TEXTENCODING_UTF8),
+ getProperty<OUString>(getRun(getParagraph(1), 1), "CharFontName"));
+}
+
DECLARE_RTFEXPORT_TEST(testFdo53113, "fdo53113.odt")
{
/*
diff --git a/sw/source/filter/inc/msfilter.hxx b/sw/source/filter/inc/msfilter.hxx
index 30a5997ac6a1..0575252ffe0c 100644
--- a/sw/source/filter/inc/msfilter.hxx
+++ b/sw/source/filter/inc/msfilter.hxx
@@ -59,8 +59,7 @@ namespace sw
{
/** MSOffice appears to set the charset of unicode fonts to MS 932
- Arial Unicode MS for example is a unicode font, but word sets
- exported uses of it to the MS 932 charset
+ But we do "default", whatever that means.
@param eTextEncoding
the OOo encoding to convert from
@@ -73,6 +72,15 @@ namespace sw
*/
sal_uInt8 rtl_TextEncodingToWinCharset(rtl_TextEncoding eTextEncoding);
+ /** MSOffice appears to set the charset of unicode fonts to MS 932
+
+ Arial Unicode MS for example is a unicode font, but word sets
+ exported uses of it to the MS 932 charset
+
+ */
+ sal_uInt8 rtl_TextEncodingToWinCharsetRTF(OUString const& rFontName,
+ OUString const& rAltName, rtl_TextEncoding eTextEncoding);
+
/** Import a MSWord XE field. Suitable for .doc and .rtf
@param rDoc
diff --git a/sw/source/filter/ww8/rtfattributeoutput.cxx b/sw/source/filter/ww8/rtfattributeoutput.cxx
index 3cf3b6982e20..5844c0d46cc8 100644
--- a/sw/source/filter/ww8/rtfattributeoutput.cxx
+++ b/sw/source/filter/ww8/rtfattributeoutput.cxx
@@ -2016,7 +2016,12 @@ void RtfAttributeOutput::CharFont( const SvxFontItem& rFont)
m_aStylesEnd.append(OOO_STRING_SVTOOLS_RTF_LOCH);
m_aStylesEnd.append(OOO_STRING_SVTOOLS_RTF_F);
m_aStylesEnd.append((sal_Int32)m_rExport.maFontHelper.GetId(rFont));
- m_rExport.eCurrentEncoding = rtl_getTextEncodingFromWindowsCharset(rtl_getBestWindowsCharsetFromTextEncoding(rFont.GetCharSet()));
+ // FIXME: this may be a tad expensive... but the charset needs to be
+ // consistent with what wwFont::WriteRtf() does
+ FontMapExport aTmp(rFont.GetFamilyName());
+ m_rExport.eCurrentEncoding = rtl_getTextEncodingFromWindowsCharset(
+ sw::ms::rtl_TextEncodingToWinCharsetRTF(
+ aTmp.msPrimary, aTmp.msSecondary, rFont.GetCharSet()));
}
void RtfAttributeOutput::CharFontSize( const SvxFontHeightItem& rFontSize)
@@ -3274,7 +3279,10 @@ void RtfAttributeOutput::StartFont( const OUString& rFamilyName ) const
{
SAL_INFO("sw.rtf", OSL_THIS_FUNC);
- m_rExport.Strm() << OUStringToOString( rFamilyName, m_rExport.eCurrentEncoding ).getStr();
+ // write the font name hex-encoded, but without Unicode - Word at least
+ // cannot read *both* Unicode and fallback as written by OutString
+ m_rExport.Strm() <<
+ msfilter::rtfutil::OutString(rFamilyName, m_rExport.eCurrentEncoding, false).getStr();
}
/// End the font.
@@ -3283,6 +3291,7 @@ void RtfAttributeOutput::EndFont() const
SAL_INFO("sw.rtf", OSL_THIS_FUNC);
m_rExport.Strm() << ";}";
+ m_rExport.eCurrentEncoding = m_rExport.eDefaultEncoding;
}
/// Alternate name for the font.
@@ -3291,7 +3300,11 @@ void RtfAttributeOutput::FontAlternateName( const OUString& rName ) const
SAL_INFO("sw.rtf", OSL_THIS_FUNC);
m_rExport.Strm() << '{' << OOO_STRING_SVTOOLS_RTF_IGNORE << OOO_STRING_SVTOOLS_RTF_FALT << ' ';
- m_rExport.Strm() << OUStringToOString( rName, m_rExport.eCurrentEncoding ).getStr() << '}';
+ // write the font name hex-encoded, but without Unicode - Word at least
+ // cannot read *both* Unicode and fallback as written by OutString
+ m_rExport.Strm() <<
+ msfilter::rtfutil::OutString(rName, m_rExport.eCurrentEncoding, false).getStr()
+ << '}';
}
/// Font charset.
@@ -3302,6 +3315,7 @@ void RtfAttributeOutput::FontCharset( sal_uInt8 nCharSet ) const
m_rExport.Strm() << OOO_STRING_SVTOOLS_RTF_FCHARSET;
m_rExport.OutULong( nCharSet );
m_rExport.Strm() << ' ';
+ m_rExport.eCurrentEncoding =rtl_getTextEncodingFromWindowsCharset(nCharSet);
}
/// Font family.
diff --git a/sw/source/filter/ww8/writerwordglue.cxx b/sw/source/filter/ww8/writerwordglue.cxx
index bafa0f0c23eb..f4141e59b3c7 100644
--- a/sw/source/filter/ww8/writerwordglue.cxx
+++ b/sw/source/filter/ww8/writerwordglue.cxx
@@ -712,6 +712,55 @@ namespace sw
return nRet;
}
+ static bool
+ CanEncode(OUString const& rString, rtl_TextEncoding const eEncoding)
+ {
+ rtl::OString tmp;
+ return rString.convertToString(&tmp, eEncoding,
+ RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR |
+ RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR);
+ }
+
+ sal_uInt8 rtl_TextEncodingToWinCharsetRTF(
+ OUString const& rFontName, OUString const& rAltName,
+ rtl_TextEncoding eTextEncoding)
+ {
+ static struct { rtl_TextEncoding enc; sal_uInt8 charset; }
+ const s_fallbacks [] = {
+ { RTL_TEXTENCODING_MS_932, 0x80 }, // Shift-JIS
+ { RTL_TEXTENCODING_MS_936, 0x86 }, // GB-2312
+ { RTL_TEXTENCODING_MS_950, 0x88 }, // Big5
+ { RTL_TEXTENCODING_MS_949, 0x81 }, // EUC-KR
+ };
+ sal_uInt8 nRet =
+ rtl_getBestWindowsCharsetFromTextEncoding(eTextEncoding);
+ switch (eTextEncoding)
+ {
+ case RTL_TEXTENCODING_DONTKNOW:
+ case RTL_TEXTENCODING_UCS2:
+ case RTL_TEXTENCODING_UTF7:
+ case RTL_TEXTENCODING_UTF8:
+ case RTL_TEXTENCODING_JAVA_UTF8:
+ for (size_t i = 0; i < SAL_N_ELEMENTS(s_fallbacks); ++i)
+ {
+ // fall back to a charset that can at least encode
+ // the font's name
+ if (CanEncode(rFontName, s_fallbacks[i].enc)
+ && CanEncode(rAltName, s_fallbacks[i].enc))
+ {
+ return s_fallbacks[i].charset;
+ }
+ }
+ SAL_INFO("sw.rtf", "no fallback charset found for font: "
+ << rFontName << " " << rAltName);
+ nRet = 0x01; // all hope lost: "default", whatever that is
+ break;
+ default:
+ break;
+ }
+ return nRet;
+ }
+
long DateTime2DTTM( const DateTime& rDT )
{
/*
diff --git a/sw/source/filter/ww8/wrtw8sty.cxx b/sw/source/filter/ww8/wrtw8sty.cxx
index 52a050a6b3cb..2675cc56c385 100644
--- a/sw/source/filter/ww8/wrtw8sty.cxx
+++ b/sw/source/filter/ww8/wrtw8sty.cxx
@@ -857,7 +857,8 @@ void wwFont::WriteRtf( const RtfAttributeOutput* rAttrOutput ) const
{
rAttrOutput->FontFamilyType( meFamily, *this );
rAttrOutput->FontPitchType( mePitch );
- rAttrOutput->FontCharset( rtl_getBestWindowsCharsetFromTextEncoding( meChrSet ) );
+ rAttrOutput->FontCharset(
+ sw::ms::rtl_TextEncodingToWinCharsetRTF(msFamilyNm, msAltNm, meChrSet));
rAttrOutput->StartFont( msFamilyNm );
if ( mbAlt )
rAttrOutput->FontAlternateName( msAltNm );