summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCaolán McNamara <caolanm@redhat.com>2014-08-27 15:03:45 +0100
committerCaolán McNamara <caolanm@redhat.com>2014-08-27 15:05:49 +0100
commit47b84f7e5143f445a087fc9ccc4fb29bbd88ff64 (patch)
tree0467340bd5406f52d6ec1d28800c8a61a585b5c4
parent4143d7bc7078fb367130e092a354b20da57585cc (diff)
Resolves: fdo#82904 non-Japanese ww95 documents claiming ms932 encoding
Change-Id: I62f8d5c3cac71f83f5cdde114f66e8554a780538
-rw-r--r--sw/source/filter/ww8/ww8par.cxx45
1 files changed, 44 insertions, 1 deletions
diff --git a/sw/source/filter/ww8/ww8par.cxx b/sw/source/filter/ww8/ww8par.cxx
index 4fc41eb40f4b..54cfbcc3a7fd 100644
--- a/sw/source/filter/ww8/ww8par.cxx
+++ b/sw/source/filter/ww8/ww8par.cxx
@@ -3037,8 +3037,51 @@ bool SwWW8ImplReader::ReadPlainChars(WW8_CP& rPos, sal_Int32 nEnd, sal_Int32 nCp
// the correct FilePos has already been reached.
const sal_Int32 nStrLen = std::min(nValidStrLen, SAL_MAX_INT32-1);
- const rtl_TextEncoding eSrcCharSet = bVer67 ? GetCurrentCharSet() :
+ rtl_TextEncoding eSrcCharSet = bVer67 ? GetCurrentCharSet() :
RTL_TEXTENCODING_MS_1252;
+ if (bVer67 && eSrcCharSet == RTL_TEXTENCODING_MS_932)
+ {
+ /*
+ fdo#82904
+
+ Older documents exported as word 95 that use unicode aware fonts will
+ have the charset of those fonts set to RTL_TEXTENCODING_MS_932 on
+ export as the conversion from RTL_TEXTENCODING_UNICODE. This is a serious
+ pain.
+
+ We will try and use a fallback encoding if the conversion from
+ RTL_TEXTENCODING_MS_932 fails, but you can get unlucky and get a document
+ which isn't really in RTL_TEXTENCODING_MS_932 but parts of it form
+ valid RTL_TEXTENCODING_MS_932 by chance :-(
+
+ We're not the only ones that struggle with this: Here's the help from
+ MSOffice 2003 on the topic:
+
+ <<
+ Earlier versions of Microsoft Word were sometimes used in conjunction with
+ third-party language-processing add-in programs designed to support Chinese or
+ Korean on English versions of Microsoft Windows. Use of these add-ins sometimes
+ results in incorrect text display in more recent versions of Word.
+
+ However, you can set options to convert these documents so that text is
+ displayed correctly. On the Tools menu, click Options, and then click the
+ General tab. In the English Word 6.0/95 documents list, select Contain Asian
+ text (to have Word interpret the text as Asian code page data, regardless of
+ its font) or Automatically detect Asian text (to have Word attempt to determine
+ which parts of the text are meant to be Asian).
+ >>
+
+ What we can try here is to ignore a RTL_TEXTENCODING_MS_932 codepage if
+ the language is not Japanese
+ */
+
+ const SfxPoolItem * pItem = GetFmtAttr(RES_CHRATR_CJK_LANGUAGE);
+ if (pItem != NULL && LANGUAGE_JAPANESE != static_cast<const SvxLanguageItem *>(pItem)->GetLanguage())
+ {
+ SAL_WARN("sw.ww8", "discarding word95 RTL_TEXTENCODING_MS_932 encoding");
+ eSrcCharSet = GetCharSetFromLanguage();
+ }
+ }
const rtl_TextEncoding eSrcCJKCharSet = bVer67 ? GetCurrentCJKCharSet() :
RTL_TEXTENCODING_MS_1252;