diff options
author | Caolán McNamara <caolanm@redhat.com> | 2011-02-24 13:28:15 +0000 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2011-02-24 13:28:15 +0000 |
commit | 1ef7ca4438b74f9f7b24451df90b860418d70bb1 (patch) | |
tree | e01d572392a752b257108c0ae545baa1ee40217e | |
parent | 83e28f13c3ca2b7b11db1f9c3b3eb6725ad106ca (diff) |
Resolves: fdo#34319 on import take into account idcthint
i.e. force on the font that word would use. We really need a idcthint
implementation of our own to be able to do this completely correct.
-rw-r--r-- | sw/source/filter/ww8/ww8par.cxx | 194 | ||||
-rw-r--r-- | sw/source/filter/ww8/ww8par.hxx | 4 |
2 files changed, 158 insertions, 40 deletions
diff --git a/sw/source/filter/ww8/ww8par.cxx b/sw/source/filter/ww8/ww8par.cxx index 9a6283ef0026..5b49fb1a757c 100644 --- a/sw/source/filter/ww8/ww8par.cxx +++ b/sw/source/filter/ww8/ww8par.cxx @@ -56,6 +56,7 @@ #include <editeng/langitem.hxx> #include <editeng/opaqitem.hxx> #include <editeng/charhiddenitem.hxx> +#include <editeng/fontitem.hxx> #include <filter/msfilter/svxmsbas.hxx> #include <svx/unoapi.hxx> #include <svx/svdoole2.hxx> @@ -135,6 +136,8 @@ #include <osl/file.hxx> #include <com/sun/star/document/XDocumentInfoSupplier.hpp> +#include <breakit.hxx> + #ifdef DEBUG #include <iostream> #include <dbgoutsw.hxx> @@ -154,6 +157,8 @@ using namespace nsHdFtFlags; #include <com/sun/star/document/XEventsSupplier.hpp> #include <com/sun/star/container/XNameReplace.hpp> +#include <com/sun/star/i18n/XBreakIterator.hpp> +#include <com/sun/star/i18n/ScriptType.hdl> #include <com/sun/star/frame/XModel.hpp> #include <filter/msfilter/msvbahelper.hxx> #include <unotools/pathoptions.hxx> @@ -2638,29 +2643,144 @@ bool SwWW8ImplReader::ReadPlainChars(WW8_CP& rPos, long nEnd, long nCpOfs) return nL2 >= nLen; } -//TODO: In writer we categorize text into CJK, CTL and "Western" for everything -//else. Microsoft Word basically categorizes text into East Asian, Non-East -//Asian and ASCII, with some shared characters and some properties to -//to hint as to which way to bias those shared characters. +#define MSASCII SAL_MAX_INT16 + +namespace +{ + //We want to force weak chars inside 0x0020 to 0x007F to LATIN + sal_Int16 lcl_getScriptType( + const uno::Reference<i18n::XBreakIterator>& rBI, + const rtl::OUString &rString, sal_Int32 nPos) + { + sal_Int16 nScript = rBI->getScriptType(rString, nPos); + if (nScript == i18n::ScriptType::WEAK && rString[nPos] >= 0x0020 && rString[nPos] <= 0x007F) + nScript = MSASCII; + return nScript; + } + + //We want to know about WEAK segments, so endOfScript isn't + //useful, and see lcl_getScriptType anyway + sal_Int32 lcl_endOfScript( + const uno::Reference<i18n::XBreakIterator>& rBI, + const rtl::OUString &rString, sal_Int32 nPos, sal_Int16 nScript) + { + while (nPos < rString.getLength()) + { + sal_Int16 nNewScript = lcl_getScriptType(rBI, rString, nPos); + if (nScript != nNewScript) + break; + ++nPos; + } + return nPos; + } +} + +//In writer we categorize text into CJK, CTL and "Western" for everything else. +//Microsoft Word basically categorizes text into East Asian, Complex, ASCII, +//NonEastAsian/HighAnsi, with some shared characters and some properties to to +//hint as to which way to bias those shared characters. // -//Here we must find out "what would word do" to see what font/language -//word would assign to characters based on the unicode range they fall -//into, taking into account the idctHint property if it exists. +//That's four categories, we however have three categories. Given that problem +//here we would ideally find out "what would word do" to see what font/language +//word would assign to characters based on the unicode range they fall into and +//hack the word one onto the range we use. However it's unclear what word's +//categorization is. So we don't do that here yet. // -//Where this differs from the default category that writer would assign it to -//we're then forced (because we don't have an equivalent hint) to mirror the -//properties of the source MSWord category into the properties of the dest -//Writer category for that range of text in order to get the right results. -bool SwWW8ImplReader::emulateMSWordAddTextToParagraph(const String& rAddString) +//Additional to the categorization, when word encounters weak text for ambigious +//chars it uses idcthint to indicate which way to bias. We don't have a idcthint +//feature in writer. +// +//So what we currently do here then is to split our text into non-weak/weak +//sections and uses word's idcthint to determine what font it would use and +//force that on for the segment. Following what we *do* know about word's +//categorization, we know that the range 0x0020 and 0x007F is sprmCRgFtc0 in +//word, something we map to LATIN, so we consider all weaks chars in that range +//to auto-bias to LATIN. +// +//See https://bugs.freedesktop.org/show_bug.cgi?id=34319 for an example +void SwWW8ImplReader::emulateMSWordAddTextToParagraph(const rtl::OUString& rAddString) { - return simpleAddTextToParagraph(rAddString); + if (!rAddString.getLength()) + return; + + uno::Reference<i18n::XBreakIterator> xBI(pBreakIt->GetBreakIter()); + if (!xBI.is()) + { + simpleAddTextToParagraph(rAddString); + return; + } + + sal_Int16 nScript = lcl_getScriptType(xBI, rAddString, 0); + sal_Int32 nLen = rAddString.getLength(); + + sal_Int32 nPos = 0; + while (nPos < nLen) + { + sal_Int32 nEnd = lcl_endOfScript(xBI, rAddString, nPos, nScript); + if (nEnd < 0) + break; + + rtl::OUString sChunk(rAddString.copy(nPos, nEnd-nPos)); + const sal_uInt16 aIds[] = {RES_CHRATR_FONT, RES_CHRATR_CJK_FONT, RES_CHRATR_CTL_FONT}; + bool aForced[] = {false, false, false}; + + int nLclIdctHint = 0xFF; + if (nScript == i18n::ScriptType::WEAK) + nLclIdctHint = nIdctHint; + else if (nScript == MSASCII) //Force weak chars in ascii range to use LATIN font + nLclIdctHint = 0; + + if (nLclIdctHint != 0xFF) + { + sal_uInt16 nForceFromFontId = 0; + switch (nLclIdctHint) + { + case 0: + nForceFromFontId = RES_CHRATR_FONT; + break; + case 1: + nForceFromFontId = RES_CHRATR_CJK_FONT; + break; + case 2: + nForceFromFontId = RES_CHRATR_CTL_FONT; + break; + default: + break; + } + + const SvxFontItem *pSourceFont = (const SvxFontItem*)GetFmtAttr(nForceFromFontId); + + for (size_t i = 0; i < SAL_N_ELEMENTS(aIds); ++i) + { + const SvxFontItem *pDestFont = (const SvxFontItem*)GetFmtAttr(aIds[i]); + aForced[i] = aIds[i] != nForceFromFontId && *pSourceFont != *pDestFont; + if (aForced[i]) + { + SvxFontItem aForceFont(*pSourceFont); + aForceFont.SetWhich(aIds[i]); + pCtrlStck->NewAttr(*pPaM->GetPoint(), aForceFont); + } + } + } + + simpleAddTextToParagraph(sChunk); + + for (size_t i = 0; i < SAL_N_ELEMENTS(aIds); ++i) + { + if (aForced[i]) + pCtrlStck->SetAttr(*pPaM->GetPoint(), aIds[i]); + } + + nPos = nEnd; + nScript = lcl_getScriptType(xBI, rAddString, nPos); + } + } -bool SwWW8ImplReader::simpleAddTextToParagraph(const String& rAddString) +void SwWW8ImplReader::simpleAddTextToParagraph(const String& rAddString) { - const SwTxtNode* pNd = pPaM->GetCntntNode()->GetTxtNode(); - if (rAddString.Len()) - { + if (!rAddString.Len()) + return; #ifdef DEBUG { @@ -2670,35 +2790,33 @@ bool SwWW8ImplReader::simpleAddTextToParagraph(const String& rAddString) << ::std::endl; } #endif + const SwTxtNode* pNd = pPaM->GetCntntNode()->GetTxtNode(); - if ((pNd->GetTxt().Len() + rAddString.Len()) < STRING_MAXLEN-1) + if ((pNd->GetTxt().Len() + rAddString.Len()) < STRING_MAXLEN-1) + { + rDoc.InsertString(*pPaM, rAddString); + } + else + { + + if (pNd->GetTxt().Len()< STRING_MAXLEN -1) { - rDoc.InsertString(*pPaM, rAddString); + String sTempStr (rAddString,0, + STRING_MAXLEN - pNd->GetTxt().Len() -1); + rDoc.InsertString(*pPaM, sTempStr); + sTempStr = rAddString.Copy(sTempStr.Len(), + rAddString.Len() - sTempStr.Len()); + AppendTxtNode(*pPaM->GetPoint()); + rDoc.InsertString(*pPaM, sTempStr); } else { - - if (pNd->GetTxt().Len()< STRING_MAXLEN -1) - { - String sTempStr (rAddString,0, - STRING_MAXLEN - pNd->GetTxt().Len() -1); - rDoc.InsertString(*pPaM, sTempStr); - sTempStr = rAddString.Copy(sTempStr.Len(), - rAddString.Len() - sTempStr.Len()); - AppendTxtNode(*pPaM->GetPoint()); - rDoc.InsertString(*pPaM, sTempStr); - } - else - { - AppendTxtNode(*pPaM->GetPoint()); - rDoc.InsertString(*pPaM, rAddString); - } + AppendTxtNode(*pPaM->GetPoint()); + rDoc.InsertString(*pPaM, rAddString); } - - bReadTable = false; } - return true; + bReadTable = false; } // Returnwert: true for para end diff --git a/sw/source/filter/ww8/ww8par.hxx b/sw/source/filter/ww8/ww8par.hxx index d1b0aea66076..2e6587a60000 100644 --- a/sw/source/filter/ww8/ww8par.hxx +++ b/sw/source/filter/ww8/ww8par.hxx @@ -1117,8 +1117,8 @@ private: pReffingStck = 0; } void DeleteAnchorStk() { DeleteStk( pAnchorStck ); pAnchorStck = 0; } - bool emulateMSWordAddTextToParagraph(const String& sAddString); - bool simpleAddTextToParagraph(const String& sAddString); + void emulateMSWordAddTextToParagraph(const rtl::OUString& rAddString); + void simpleAddTextToParagraph(const String& rAddString); bool HandlePageBreakChar(); bool ReadChar(long nPosCp, long nCpOfs); bool ReadPlainChars(WW8_CP& rPos, long nEnd, long nCpOfs); |