diff options
author | Caolán McNamara <caolanm@redhat.com> | 2014-08-27 15:03:45 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2014-08-27 15:05:49 +0100 |
commit | 47b84f7e5143f445a087fc9ccc4fb29bbd88ff64 (patch) | |
tree | 0467340bd5406f52d6ec1d28800c8a61a585b5c4 | |
parent | 4143d7bc7078fb367130e092a354b20da57585cc (diff) |
Resolves: fdo#82904 non-Japanese ww95 documents claiming ms932 encoding
Change-Id: I62f8d5c3cac71f83f5cdde114f66e8554a780538
-rw-r--r-- | sw/source/filter/ww8/ww8par.cxx | 45 |
1 files changed, 44 insertions, 1 deletions
diff --git a/sw/source/filter/ww8/ww8par.cxx b/sw/source/filter/ww8/ww8par.cxx index 4fc41eb40f4b..54cfbcc3a7fd 100644 --- a/sw/source/filter/ww8/ww8par.cxx +++ b/sw/source/filter/ww8/ww8par.cxx @@ -3037,8 +3037,51 @@ bool SwWW8ImplReader::ReadPlainChars(WW8_CP& rPos, sal_Int32 nEnd, sal_Int32 nCp // the correct FilePos has already been reached. const sal_Int32 nStrLen = std::min(nValidStrLen, SAL_MAX_INT32-1); - const rtl_TextEncoding eSrcCharSet = bVer67 ? GetCurrentCharSet() : + rtl_TextEncoding eSrcCharSet = bVer67 ? GetCurrentCharSet() : RTL_TEXTENCODING_MS_1252; + if (bVer67 && eSrcCharSet == RTL_TEXTENCODING_MS_932) + { + /* + fdo#82904 + + Older documents exported as word 95 that use unicode aware fonts will + have the charset of those fonts set to RTL_TEXTENCODING_MS_932 on + export as the conversion from RTL_TEXTENCODING_UNICODE. This is a serious + pain. + + We will try and use a fallback encoding if the conversion from + RTL_TEXTENCODING_MS_932 fails, but you can get unlucky and get a document + which isn't really in RTL_TEXTENCODING_MS_932 but parts of it form + valid RTL_TEXTENCODING_MS_932 by chance :-( + + We're not the only ones that struggle with this: Here's the help from + MSOffice 2003 on the topic: + + << + Earlier versions of Microsoft Word were sometimes used in conjunction with + third-party language-processing add-in programs designed to support Chinese or + Korean on English versions of Microsoft Windows. Use of these add-ins sometimes + results in incorrect text display in more recent versions of Word. + + However, you can set options to convert these documents so that text is + displayed correctly. On the Tools menu, click Options, and then click the + General tab. In the English Word 6.0/95 documents list, select Contain Asian + text (to have Word interpret the text as Asian code page data, regardless of + its font) or Automatically detect Asian text (to have Word attempt to determine + which parts of the text are meant to be Asian). + >> + + What we can try here is to ignore a RTL_TEXTENCODING_MS_932 codepage if + the language is not Japanese + */ + + const SfxPoolItem * pItem = GetFmtAttr(RES_CHRATR_CJK_LANGUAGE); + if (pItem != NULL && LANGUAGE_JAPANESE != static_cast<const SvxLanguageItem *>(pItem)->GetLanguage()) + { + SAL_WARN("sw.ww8", "discarding word95 RTL_TEXTENCODING_MS_932 encoding"); + eSrcCharSet = GetCharSetFromLanguage(); + } + } const rtl_TextEncoding eSrcCJKCharSet = bVer67 ? GetCurrentCJKCharSet() : RTL_TEXTENCODING_MS_1252; |