summaryrefslogtreecommitdiff
path: root/writerfilter/source/rtftok/rtfdocumentimpl.cxx
diff options
context:
space:
mode:
authorVasily Melenchuk <vasily.melenchuk@cib.de>2022-04-07 20:59:08 +0300
committerMiklos Vajna <vmiklos@collabora.com>2022-04-08 11:22:54 +0200
commit844be7358f1eec00094a55fa1fb4fadadb8cd1bf (patch)
treea0b5f6544717cc16b0c5f452a504a2d7e7070f1e /writerfilter/source/rtftok/rtfdocumentimpl.cxx
parent3a88b513fd90f4793b6de7a7412fa33369542f40 (diff)
tdf#95706: RTF import: tolerant font table parsing
While font name in font table should end with semicolon ({\fonttbl{\f42 Arial;}}) it is not always true and MS Word is tolerant to it: it still able to parse this correctly. Seems LO also should not require strict spec conformance. So idea of font parsing is changed: instead of inserting font on semicolon, it is done on next \fN or destination end. All collected text to this moment is a font name. Change-Id: I6b41951217442a71fd2ebbfc58a3fc79f6f913db Reviewed-on: https://gerrit.libreoffice.org/c/core/+/132686 Tested-by: Jenkins Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
Diffstat (limited to 'writerfilter/source/rtftok/rtfdocumentimpl.cxx')
-rw-r--r--writerfilter/source/rtftok/rtfdocumentimpl.cxx162
1 files changed, 88 insertions, 74 deletions
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 5a19ccebb20a..47349ac8aaba 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -1332,6 +1332,74 @@ void RTFDocumentImpl::singleChar(sal_uInt8 nValue, bool bRunProps)
}
}
+void RTFDocumentImpl::handleFontTableEntry()
+{
+ OUString aName = m_aStates.top().getCurrentDestinationText()->makeStringAndClear();
+
+ if (aName.isEmpty())
+ return;
+
+ if (aName.endsWith(";"))
+ {
+ aName = aName.copy(0, aName.getLength() - 1);
+ }
+
+ // Old documents can contain no encoding information in fontinfo,
+ // but there can be font name suffixes: Arial CE is not a special
+ // font, it is ordinal Arial, but with used cp 1250 encoding.
+ // Moreover these suffixes have priority over \cpgN and \fcharsetN
+ // in MS Word.
+ OUString aFontSuffix;
+ OUString aNameNoSuffix(aName);
+ sal_Int32 nLastSpace = aName.lastIndexOf(' ');
+ if (nLastSpace >= 0)
+ {
+ aFontSuffix = aName.copy(nLastSpace + 1);
+ aNameNoSuffix = aName.copy(0, nLastSpace);
+ sal_Int32 nEncoding = RTL_TEXTENCODING_DONTKNOW;
+ for (int i = 0; aRTFFontNameSuffixes[i].codepage != RTL_TEXTENCODING_DONTKNOW; i++)
+ {
+ if (aFontSuffix.equalsAscii(aRTFFontNameSuffixes[i].suffix))
+ {
+ nEncoding = aRTFFontNameSuffixes[i].codepage;
+ break;
+ }
+ }
+ if (nEncoding > RTL_TEXTENCODING_DONTKNOW)
+ {
+ m_nCurrentEncoding = nEncoding;
+ m_aStates.top().setCurrentEncoding(m_nCurrentEncoding);
+ }
+ else
+ {
+ // Unknown suffix: looks like it is just a part of font name, restore it
+ aNameNoSuffix = aName;
+ }
+ }
+
+ m_aFontNames[m_nCurrentFontIndex] = aNameNoSuffix;
+ if (m_nCurrentEncoding >= 0)
+ {
+ m_aFontEncodings[m_nCurrentFontIndex] = m_nCurrentEncoding;
+ m_nCurrentEncoding = -1;
+ }
+ m_aStates.top().getTableAttributes().set(NS_ooxml::LN_CT_Font_name,
+ new RTFValue(aNameNoSuffix));
+
+ writerfilter::Reference<Properties>::Pointer_t const pProp(new RTFReferenceProperties(
+ m_aStates.top().getTableAttributes(), m_aStates.top().getTableSprms()));
+
+ //See fdo#47347 initial invalid font entry properties are inserted first,
+ //so when we attempt to insert the correct ones, there's already an
+ //entry in the map for them, so the new ones aren't inserted.
+ auto lb = m_aFontTableEntries.lower_bound(m_nCurrentFontIndex);
+ if (lb != m_aFontTableEntries.end()
+ && !(m_aFontTableEntries.key_comp()(m_nCurrentFontIndex, lb->first)))
+ lb->second = pProp;
+ else
+ m_aFontTableEntries.insert(lb, std::make_pair(m_nCurrentFontIndex, pProp));
+}
+
void RTFDocumentImpl::text(OUString& rString)
{
if (rString.getLength() == 1 && m_aStates.top().getDestination() != Destination::DOCCOMM)
@@ -1345,10 +1413,7 @@ void RTFDocumentImpl::text(OUString& rString)
bool bRet = true;
switch (m_aStates.top().getDestination())
{
- // Note: in fonttbl there may or may not be groups; in stylesheet
- // and revtbl groups are mandatory
- case Destination::FONTTABLE:
- case Destination::FONTENTRY:
+ // Note: in stylesheet and revtbl groups are mandatory
case Destination::STYLEENTRY:
case Destination::LISTNAME:
case Destination::REVISIONENTRY:
@@ -1368,68 +1433,6 @@ void RTFDocumentImpl::text(OUString& rString)
= m_aStates.top().getCurrentDestinationText()->makeStringAndClear();
switch (m_aStates.top().getDestination())
{
- case Destination::FONTTABLE:
- case Destination::FONTENTRY:
- {
- // Old documents can contain no encoding information in fontinfo,
- // but there can be font name suffixes: Arial CE is not a special
- // font, it is ordinal Arial, but with used cp 1250 encoding.
- // Moreover these suffixes have priority over \cpgN and \fcharsetN
- // in MS Word.
- OUString aFontSuffix;
- OUString aNameNoSuffix(aName);
- sal_Int32 nLastSpace = aName.lastIndexOf(' ');
- if (nLastSpace >= 0)
- {
- aFontSuffix = aName.copy(nLastSpace + 1);
- aNameNoSuffix = aName.copy(0, nLastSpace);
- sal_Int32 nEncoding = RTL_TEXTENCODING_DONTKNOW;
- for (int i = 0;
- aRTFFontNameSuffixes[i].codepage != RTL_TEXTENCODING_DONTKNOW; i++)
- {
- if (aFontSuffix.equalsAscii(aRTFFontNameSuffixes[i].suffix))
- {
- nEncoding = aRTFFontNameSuffixes[i].codepage;
- break;
- }
- }
- if (nEncoding > RTL_TEXTENCODING_DONTKNOW)
- {
- m_nCurrentEncoding = nEncoding;
- m_aStates.top().setCurrentEncoding(m_nCurrentEncoding);
- }
- else
- {
- // Unknown suffix: looks like it is just a part of font name, restore it
- aNameNoSuffix = aName;
- }
- }
-
- m_aFontNames[m_nCurrentFontIndex] = aNameNoSuffix;
- if (m_nCurrentEncoding >= 0)
- {
- m_aFontEncodings[m_nCurrentFontIndex] = m_nCurrentEncoding;
- m_nCurrentEncoding = -1;
- }
- m_aStates.top().getTableAttributes().set(NS_ooxml::LN_CT_Font_name,
- new RTFValue(aNameNoSuffix));
-
- writerfilter::Reference<Properties>::Pointer_t const pProp(
- new RTFReferenceProperties(m_aStates.top().getTableAttributes(),
- m_aStates.top().getTableSprms()));
-
- //See fdo#47347 initial invalid font entry properties are inserted first,
- //so when we attempt to insert the correct ones, there's already an
- //entry in the map for them, so the new ones aren't inserted.
- auto lb = m_aFontTableEntries.lower_bound(m_nCurrentFontIndex);
- if (lb != m_aFontTableEntries.end()
- && !(m_aFontTableEntries.key_comp()(m_nCurrentFontIndex, lb->first)))
- lb->second = pProp;
- else
- m_aFontTableEntries.insert(lb,
- std::make_pair(m_nCurrentFontIndex, pProp));
- }
- break;
case Destination::STYLEENTRY:
{
RTFValue::Pointer_t pType
@@ -1467,6 +1470,8 @@ void RTFDocumentImpl::text(OUString& rString)
}
}
break;
+ case Destination::FONTTABLE:
+ case Destination::FONTENTRY:
case Destination::LEVELTEXT:
case Destination::SHAPEPROPERTYNAME:
case Destination::SHAPEPROPERTYVALUE:
@@ -2216,17 +2221,26 @@ RTFError RTFDocumentImpl::beforePopState(RTFParserState& rState)
{
switch (rState.getDestination())
{
+ //Note: in fonttbl there may or may not be groups, so process it as no groups
case Destination::FONTTABLE:
+ case Destination::FONTENTRY:
{
- writerfilter::Reference<Table>::Pointer_t const pTable(
- new RTFReferenceTable(m_aFontTableEntries));
- Mapper().table(NS_ooxml::LN_FONTTABLE, pTable);
- if (m_nDefaultFontIndex >= 0)
+ // Some text unhandled? Seems it is last font name
+ if (m_aStates.top().getCurrentDestinationText()->getLength())
+ handleFontTableEntry();
+
+ if (rState.getDestination() == Destination::FONTTABLE)
{
- auto pValue = new RTFValue(m_aFontNames[getFontIndex(m_nDefaultFontIndex)]);
- putNestedAttribute(m_aDefaultState.getCharacterSprms(),
- NS_ooxml::LN_EG_RPrBase_rFonts, NS_ooxml::LN_CT_Fonts_ascii,
- pValue);
+ writerfilter::Reference<Table>::Pointer_t const pTable(
+ new RTFReferenceTable(m_aFontTableEntries));
+ Mapper().table(NS_ooxml::LN_FONTTABLE, pTable);
+ if (m_nDefaultFontIndex >= 0)
+ {
+ auto pValue = new RTFValue(m_aFontNames[getFontIndex(m_nDefaultFontIndex)]);
+ putNestedAttribute(m_aDefaultState.getCharacterSprms(),
+ NS_ooxml::LN_EG_RPrBase_rFonts, NS_ooxml::LN_CT_Fonts_ascii,
+ pValue);
+ }
}
}
break;