summaryrefslogtreecommitdiff
path: root/writerfilter
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2019-12-13 09:36:39 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2019-12-16 17:23:57 +0100
commit5cdb14345842c07eb1a466897753da910e9488f8 (patch)
tree8060407f05d37aa856096c94ca808330c173135e /writerfilter
parentd396819ce8c8561afcf69c7e0dbfb6108439b4c3 (diff)
tdf#129353, tdf#129402: fix node creation on index import
ToC, bibliography, and index sections import code changed to closely follow what Word does, make sure that pre-rendered entries don't get imported as standalone paragraphs outside of the index sections, and paragraph count is accurate (no missing or added paragraphs as much as possible). In Word, an index may start and end in the middle of a paragraph: <w:p> <w:r> <w:t>Some text before index</w:t> </w:r> <w:r> <w:fldChar w:fldCharType="begin"/> </w:r> <w:r> <w:instrText> TOC ...</w:instrText> </w:r> <w:r> <w:fldChar w:fldCharType="separate"/> </w:r> <w:r> <w:t>First pre-rendered index entry</w:t> </w:r> </w:p> ... <w:p> <w:r> <w:t>Last pre-rendered index entry</w:t> </w:r> <w:r> <w:fldChar w:fldCharType="end"/> </w:r> <w:r> <w:t>Some text after index</w:t> </w:r> </w:p> However, normally it looks like either no runs precedig index, or no runs of pre-rendered contents will be present. When no Std elements are used, the typical situation is that there's a normal paragraph (possibly with some user text), which ends with index start marker, without any pre-rendered contents in the same paragraph; and all pre- rendered contents goes in following paragraphs. Such index normally ends with index end marker in the *first* run of a paragraph, which then might have normal text runs. When Stds are used, then no leading/trailing out-of-index runs in paragraphs with marks are usually present; and in this case, when paragraphs with index marks don't contain pre-rendered entries, they still are treated as part of the index. In Writer, indexes are node sections (and so cannot be inline with other paragraph contents). When there was some paragraph content already before the start-of-index mark, the paragraph is assumed to end before the index; in this case, when current <w:p> element ends, importer decides if a separate starting paragraph is needed or not, depending on if there was some runs after the mark. When there was no text runs before the starting mark, then the paragraph is treated as leading paragraph of the index. This allows to not miss empty paragraphs before index; and not have two paragraphs where there was one in Word. Only in cases when user had manually typed text both in and outside of the index in the same paragraph in Word, we would have the paragraph split into two in Writer. For end marks, the behaviour depends on whether it's inside Std. When inside, the ending paragraph starting with index end mark is considered part of the index. For out-of-Std case, it's considered normal paragraph (and measures are taken to make sure it's not dropped even if empty, because sometimes such paragraphs don't have other content, and have section settings, which is usually treated by Writer as "drop this paragraph" sign). A special problem is multi-column index. It's wrapped into a continuous section by Word; and in Writer, we also wrap it into a section. It would be possibly useful to detect somehow if this section is part of index definition, and in this case, drop the section and put its properties into the Writer's index section. That would avoid an explicit section in the imported document. This is TODO, for someone who figures how to detect reliably if the section belongs to index definition. See comment in DomainMapper_Impl::appendTextSectionAfter. By the way, current export code is wrong, producing an index that is single-column in Word; this change doesn't touch that. Several existing tests needed to be fixed, which used to test wrong results. Change-Id: I9597c8ab13f31ded9abcc24054d3478d3e3a3b40 Reviewed-on: https://gerrit.libreoffice.org/85089 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'writerfilter')
-rw-r--r--writerfilter/source/dmapper/DomainMapper.cxx11
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.cxx180
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.hxx5
3 files changed, 125 insertions, 71 deletions
diff --git a/writerfilter/source/dmapper/DomainMapper.cxx b/writerfilter/source/dmapper/DomainMapper.cxx
index 49d11cf3be87..9bbbedf64f9c 100644
--- a/writerfilter/source/dmapper/DomainMapper.cxx
+++ b/writerfilter/source/dmapper/DomainMapper.cxx
@@ -3418,11 +3418,12 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len)
// If the paragraph contains only the section properties and it has
// no runs, we should not create a paragraph for it in Writer, unless that would remove the whole section.
SectionPropertyMap* pSectionContext = m_pImpl->GetSectionContext();
- bool bRemove = !m_pImpl->GetParaChanged() && m_pImpl->GetParaSectpr()
- && !bSingleParagraphAfterRedline
- && !m_pImpl->GetIsDummyParaAddedForTableInSection()
- && !( pSectionContext && pSectionContext->GetBreakType() != -1 && pContext && pContext->isSet(PROP_BREAK_TYPE) )
- && !m_pImpl->GetIsPreviousParagraphFramed();
+ bool bRemove = (!m_pImpl->GetParaChanged() && m_pImpl->GetRemoveThisPara()) ||
+ (!m_pImpl->GetParaChanged() && m_pImpl->GetParaSectpr()
+ && !bSingleParagraphAfterRedline
+ && !m_pImpl->GetIsDummyParaAddedForTableInSection()
+ && !( pSectionContext && pSectionContext->GetBreakType() != -1 && pContext && pContext->isSet(PROP_BREAK_TYPE) )
+ && !m_pImpl->GetIsPreviousParagraphFramed());
const bool bNoNumbering = bRemove || (!m_pImpl->GetParaChanged() && m_pImpl->GetParaSectpr() && bSingleParagraph);
PropertyMapPtr xContext = bNoNumbering ? m_pImpl->GetTopContextOfType(CONTEXT_PARAGRAPH) : PropertyMapPtr();
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
index 88b985bcfc0b..b938343bb7ef 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
@@ -41,6 +41,8 @@
#include <com/sun/star/text/ChapterFormat.hpp>
#include <com/sun/star/text/FilenameDisplayFormat.hpp>
#include <com/sun/star/text/SetVariableType.hpp>
+#include <com/sun/star/text/XDocumentIndex.hpp>
+#include <com/sun/star/text/XDocumentIndexesSupplier.hpp>
#include <com/sun/star/text/XFootnote.hpp>
#include <com/sun/star/text/XLineNumberingProperties.hpp>
#include <com/sun/star/style/XStyle.hpp>
@@ -96,6 +98,7 @@
#include <unotools/configmgr.hxx>
#include <unotools/mediadescriptor.hxx>
#include <tools/diagnose_ex.h>
+#include <tools/lineend.hxx>
#include <sal/log.hxx>
@@ -1800,6 +1803,7 @@ void DomainMapper_Impl::finishParagraph( const PropertyMapPtr& pPropertyMap, con
SetIsPreviousParagraphFramed(false);
m_bParaChanged = false;
+ m_bRemoveThisParagraph = false;
if( !IsInHeaderFooter() && !IsInShape() && (!pParaContext || !pParaContext->IsFrameMode()) )
{ // If the paragraph is in a frame, shape or header/footer, it's not a paragraph of the section itself.
SetIsFirstParagraphInSection(false);
@@ -1883,7 +1887,7 @@ void DomainMapper_Impl::appendTextPortion( const OUString& rString, const Proper
uno::Reference< text::XTextCursor > xTOCTextCursor = xTextAppend->getEnd()->getText( )->createTextCursor( );
assert(xTOCTextCursor.is());
xTOCTextCursor->gotoEnd(false);
- if (m_bStartIndex || m_bStartBibliography || m_bStartGenericField)
+ if (m_bStartGenericField)
xTOCTextCursor->goLeft(1, false);
xTextRange = xTextAppend->insertTextPortion(rString, aValues, xTOCTextCursor);
SAL_WARN_IF(!xTextRange.is(), "writerfilter.dmapper", "insertTextPortion failed");
@@ -2128,8 +2132,38 @@ uno::Reference< beans::XPropertySet > DomainMapper_Impl::appendTextSectionAfter(
xCursor->gotoEnd( true );
//the paragraph after this new section is already inserted
xCursor->goLeft(1, true);
+ css::uno::Reference<css::text::XTextRange> xTextRange(xCursor, css::uno::UNO_QUERY_THROW);
+
+ if (css::uno::Reference<css::text::XDocumentIndexesSupplier> xIndexSupplier{
+ GetTextDocument(), css::uno::UNO_QUERY })
+ {
+ css::uno::Reference<css::text::XTextRangeCompare> xCompare(
+ xTextAppend, css::uno::UNO_QUERY);
+ const auto xIndexAccess = xIndexSupplier->getDocumentIndexes();
+ for (sal_Int32 i = xIndexAccess->getCount(); i > 0; --i)
+ {
+ if (css::uno::Reference<css::text::XDocumentIndex> xIndex{
+ xIndexAccess->getByIndex(i - 1), css::uno::UNO_QUERY })
+ {
+ const auto xIndexTextRange = xIndex->getAnchor();
+ if (xCompare->compareRegionStarts(xTextRange, xIndexTextRange) == 0
+ && xCompare->compareRegionEnds(xTextRange, xIndexTextRange) == 0)
+ {
+ // The boundaries coincide with an index: trying to attach a section
+ // to the range will insert the section inside the index. goRight will
+ // extend the range outside of the index, so that created section will
+ // be around it. Alternatively we could return index section itself
+ // instead : xRet.set(xIndex, uno::UNO_QUERY) - to set its properties,
+ // like columns/fill.
+ xCursor->goRight(1, true);
+ break;
+ }
+ }
+ }
+ }
+
uno::Reference< text::XTextContent > xSection( m_xTextFactory->createInstance("com.sun.star.text.TextSection"), uno::UNO_QUERY_THROW );
- xSection->attach( uno::Reference< text::XTextRange >( xCursor, uno::UNO_QUERY_THROW) );
+ xSection->attach( xTextRange );
xRet.set(xSection, uno::UNO_QUERY );
}
catch(const uno::Exception&)
@@ -4125,10 +4159,12 @@ OUString DomainMapper_Impl::extractTocTitle()
else
xCursor->gotoEnd( true );
- //the paragraph after this new section is already inserted
- xCursor->goLeft(1, true);
+ // the paragraph after this new section might have been already inserted
+ OUString sResult = xCursor->getString();
+ if (sResult.endsWith(SAL_NEWLINE_STRING))
+ sResult = sResult.copy(0, sResult.getLength() - SAL_N_ELEMENTS(SAL_NEWLINE_STRING) + 1);
- return xCursor->getString();
+ return sResult;
}
catch(const uno::Exception&)
{
@@ -4137,12 +4173,48 @@ OUString DomainMapper_Impl::extractTocTitle()
return OUString();
}
+css::uno::Reference<css::beans::XPropertySet>
+DomainMapper_Impl::StartIndexSectionChecked(const OUString& sServiceName)
+{
+ if (m_bParaChanged)
+ {
+ finishParagraph(GetTopContextOfType(CONTEXT_PARAGRAPH), false); // resets m_bParaChanged
+ PopProperties(CONTEXT_PARAGRAPH);
+ PushProperties(CONTEXT_PARAGRAPH);
+ SetIsFirstRun(true);
+ // The first paragraph of the index that is continuation of just finished one needs to be
+ // removed when finished (unless more content will arrive, which will set m_bParaChanged)
+ m_bRemoveThisParagraph = true;
+ }
+ const auto& xTextAppend = GetTopTextAppend();
+ const auto xTextRange = xTextAppend->getEnd();
+ const auto xRet = createSectionForRange(xTextRange, xTextRange, sServiceName, false);
+ if (!m_aTextAppendStack.top().xInsertPosition)
+ {
+ try
+ {
+ m_bStartedTOC = true;
+ uno::Reference<text::XTextCursor> xTOCTextCursor
+ = xTextRange->getText()->createTextCursor();
+ assert(xTOCTextCursor.is());
+ xTOCTextCursor->gotoEnd(false);
+ mxTOCTextCursor = xTOCTextCursor;
+ m_aTextAppendStack.push(TextAppendContext(xTextAppend, xTOCTextCursor));
+ }
+ catch (const uno::Exception&)
+ {
+ TOOLS_WARN_EXCEPTION("writerfilter.dmapper",
+ "DomainMapper_Impl::StartIndexSectionChecked:");
+ }
+ }
+ return xRet;
+}
+
void DomainMapper_Impl::handleToc
(const FieldContextPtr& pContext,
const OUString & sTOCServiceName)
{
OUString sValue;
- m_bStartTOC = true;
if (IsInHeaderFooter())
m_bStartTOCHeaderFooter = true;
bool bTableOfFigures = false;
@@ -4259,34 +4331,21 @@ void DomainMapper_Impl::handleToc
if (m_xTextFactory.is() && ! m_aTextAppendStack.empty())
{
+ const auto& xTextAppend = GetTopTextAppend();
if (aTocTitle.isEmpty() || bTableOfFigures)
{
// reset marker of the TOC title
- m_xStdEntryStart = uno::Reference< text::XTextRange >();
-
- xTOC.set(
- m_xTextFactory->createInstance
- ( bTableOfFigures ?
- "com.sun.star.text.IllustrationsIndex"
- : sTOCServiceName),
- uno::UNO_QUERY_THROW);
-
- OUString const sMarker("Y");
- //insert index
- uno::Reference< text::XTextContent > xToInsert( xTOC, uno::UNO_QUERY );
- uno::Reference< text::XTextAppend > xTextAppend = m_aTextAppendStack.top().xTextAppend;
- if (xTextAppend.is())
- {
- uno::Reference< text::XTextCursor > xCrsr = xTextAppend->getText()->createTextCursor();
- uno::Reference< text::XText > xText = xTextAppend->getText();
- if(xCrsr.is() && xText.is())
- {
- xCrsr->gotoEnd(false);
- xText->insertString(xCrsr, sMarker, false);
- xText->insertTextContent(uno::Reference< text::XTextRange >( xCrsr, uno::UNO_QUERY_THROW ), xToInsert, false);
- xTOCMarkerCursor = xCrsr;
- }
- }
+ m_xStdEntryStart.clear();
+
+ // Create section before setting m_bStartTOC: finishing paragraph
+ // inside StartIndexSectionChecked could do the wrong thing otherwise
+ xTOC = StartIndexSectionChecked(bTableOfFigures ? "com.sun.star.text.IllustrationsIndex"
+ : sTOCServiceName);
+
+ const auto xTextCursor = xTextAppend->getText()->createTextCursor();
+ if (xTextCursor)
+ xTextCursor->gotoEnd(false);
+ xTOCMarkerCursor = xTextCursor;
}
else
{
@@ -4295,7 +4354,6 @@ void DomainMapper_Impl::handleToc
xTOC = createSectionForRange(m_xStdEntryStart, xTextRangeEndOfTocHeader, sTOCServiceName, false);
// init [xTOCMarkerCursor]
- uno::Reference< text::XTextAppend > xTextAppend = m_aTextAppendStack.top().xTextAppend;
uno::Reference< text::XText > xText = xTextAppend->getText();
uno::Reference< text::XTextCursor > xCrsr = xText->createTextCursor();
xTOCMarkerCursor = xCrsr;
@@ -4306,6 +4364,8 @@ void DomainMapper_Impl::handleToc
}
}
+ m_bStartTOC = true;
+
if (xTOC.is())
xTOC->setPropertyValue(getPropertyName( PROP_TITLE ), uno::makeAny(aTocTitle));
@@ -4443,14 +4503,12 @@ void DomainMapper_Impl::handleBibliography
(const FieldContextPtr& pContext,
const OUString & sTOCServiceName)
{
- uno::Reference< beans::XPropertySet > xTOC;
+ // Create section before setting m_bStartTOC and m_bStartBibliography: finishing paragraph
+ // inside StartIndexSectionChecked could do the wrong thing otherwise
+ const auto xTOC = StartIndexSectionChecked(sTOCServiceName);
m_bStartTOC = true;
m_bStartBibliography = true;
- if (m_xTextFactory.is())
- xTOC.set(
- m_xTextFactory->createInstance(
- sTOCServiceName),
- uno::UNO_QUERY_THROW);
+
if (xTOC.is())
xTOC->setPropertyValue(getPropertyName( PROP_TITLE ), uno::makeAny(OUString()));
@@ -4465,18 +4523,15 @@ void DomainMapper_Impl::handleIndex
(const FieldContextPtr& pContext,
const OUString & sTOCServiceName)
{
- uno::Reference< beans::XPropertySet > xTOC;
+ // Create section before setting m_bStartTOC and m_bStartIndex: finishing paragraph
+ // inside StartIndexSectionChecked could do the wrong thing otherwise
+ const auto xTOC = StartIndexSectionChecked(sTOCServiceName);
+
m_bStartTOC = true;
m_bStartIndex = true;
OUString sValue;
OUString sIndexEntryType = "I"; // Default value for field flag '\f' is 'I'.
-
- if (m_xTextFactory.is())
- xTOC.set(
- m_xTextFactory->createInstance(
- sTOCServiceName),
- uno::UNO_QUERY_THROW);
if (xTOC.is())
{
xTOC->setPropertyValue(getPropertyName( PROP_TITLE ), uno::makeAny(OUString()));
@@ -5628,33 +5683,26 @@ void DomainMapper_Impl::PopFieldContext()
uno::Reference< text::XTextContent > xToInsert( pContext->GetTOC(), uno::UNO_QUERY );
if( xToInsert.is() )
{
- if(xTOCMarkerCursor.is() || m_bStartIndex || m_bStartBibliography)
+ if (m_bStartedTOC || m_bStartIndex || m_bStartBibliography)
{
- if (m_bStartIndex || m_bStartBibliography)
- {
- if (mxTOCTextCursor.is())
- {
- mxTOCTextCursor->goLeft(1,true);
- mxTOCTextCursor->setString(OUString());
- }
- xTextAppend->finishParagraph( uno::Sequence< beans::PropertyValue >() );
- }
- else
+ // inside Std, last empty paragraph is also part of index
+ if (!m_bParaChanged && !m_xStdEntryStart)
{
- if (!m_xStdEntryStart.is())
- {
- xTOCMarkerCursor->goLeft(1,true);
- xTOCMarkerCursor->setString(OUString());
- xTOCMarkerCursor->goLeft(1,true);
- xTOCMarkerCursor->setString(OUString());
- }
+ // End of index is the first item on a new paragraph - this paragraph
+ // should not be part of index
+ auto xCursor
+ = xTextAppend->createTextCursorByRange(xTextAppend->getEnd());
+ xCursor->gotoEnd(false);
+ xCursor->goLeft(1, true);
+ // delete
+ xCursor->setString(OUString());
+ // But a new paragraph should be started after the index instead
+ xTextAppend->finishParagraph(css::beans::PropertyValues());
}
- }
- if (m_bStartedTOC || m_bStartIndex || m_bStartBibliography)
- {
m_bStartedTOC = false;
m_aTextAppendStack.pop();
m_bTextInserted = false;
+ m_bParaChanged = true; // the paragraph must stay anyway
}
m_bStartTOC = false;
m_bStartIndex = false;
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
index 53cf5e20a140..8780bc1d51fd 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
@@ -546,6 +546,8 @@ private:
bool m_bSdt;
bool m_bIsFirstRun;
bool m_bIsOutsideAParagraph;
+ /// This is a continuation of already finished paragraph - e.g., first in an index section
+ bool m_bRemoveThisParagraph = false;
css::uno::Reference< css::text::XTextCursor > xTOCMarkerCursor;
css::uno::Reference< css::text::XTextCursor > mxTOCTextCursor;
@@ -652,6 +654,7 @@ public:
/// Getter method for m_bSdt.
bool GetSdt() const { return m_bSdt;}
bool GetParaChanged() const { return m_bParaChanged;}
+ bool GetRemoveThisPara() const { return m_bRemoveThisParagraph; }
void deferBreak( BreakType deferredBreakType );
bool isBreakDeferred( BreakType deferredBreakType );
@@ -1047,6 +1050,8 @@ public:
private:
void PushPageHeaderFooter(bool bHeader, SectionPropertyMap::PageType eType);
+ // Start a new index section; if needed, finish current paragraph
+ css::uno::Reference<css::beans::XPropertySet> StartIndexSectionChecked(const OUString& sServiceName);
std::vector<css::uno::Reference< css::drawing::XShape > > m_vTextFramesForChaining ;
/// Current paragraph had at least one field in it.
bool m_bParaHadField;