diff options
Diffstat (limited to 'sw/source/filter/html/swhtml.cxx')
-rw-r--r-- | sw/source/filter/html/swhtml.cxx | 816 |
1 files changed, 410 insertions, 406 deletions
diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx index 078dae27f0ac..dbbbc7cc0207 100644 --- a/sw/source/filter/html/swhtml.cxx +++ b/sw/source/filter/html/swhtml.cxx @@ -31,24 +31,24 @@ #include <o3tl/safeint.hxx> #include <rtl/ustrbuf.hxx> #include <svx/svxids.hrc> +#include <svx/svdotext.hxx> #if OSL_DEBUG_LEVEL > 0 #include <stdlib.h> #endif #include <hintids.hxx> +#include <utility> #include <vcl/errinf.hxx> #include <svl/stritem.hxx> #include <vcl/imap.hxx> #include <svtools/htmltokn.h> #include <svtools/htmlkywd.hxx> #include <svtools/ctrltool.hxx> -#include <unotools/configmgr.hxx> #include <unotools/pathoptions.hxx> #include <vcl/svapp.hxx> #include <sfx2/event.hxx> #include <sfx2/docfile.hxx> -#include <svtools/htmlcfg.hxx> #include <sfx2/linkmgr.hxx> #include <editeng/kernitem.hxx> #include <editeng/boxitem.hxx> @@ -117,6 +117,7 @@ #include <officecfg/Office/Writer.hxx> #include <comphelper/sequenceashashmap.hxx> #include <comphelper/sequence.hxx> +#include <officecfg/Office/Common.hxx> #include <swerror.h> #include <ndole.hxx> @@ -169,7 +170,7 @@ OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const // HTML import into Writer, avoid loading the Writer/Web template. return OUString(); - static const OUStringLiteral sTemplateWithoutExt(u"internal/html"); + static constexpr OUString sTemplateWithoutExt(u"internal/html"_ustr); SvtPathOptions aPathOpt; // first search for OpenDocument Writer/Web template @@ -203,7 +204,7 @@ bool HTMLReader::SetStrmStgPtr() } // Call for the general Reader-Interface -ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName ) +ErrCodeMsg HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName ) { SetupFilterOptions(); @@ -228,7 +229,7 @@ ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, co // so nobody steals the document! rtl::Reference<SwDoc> xHoldAlive(&rDoc); - ErrCode nRet = ERRCODE_NONE; + ErrCodeMsg nRet = ERRCODE_NONE; tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream, rName, rBaseURL, !m_bInsertMode, m_pMedium, IsReadUTF8(), @@ -244,7 +245,7 @@ ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, co + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos()))); // use the stream as transport for error number - nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr, + nRet = ErrCodeMsg( ERR_FORMAT_ROWCOL, sErr, DialogMask::ButtonsOk | DialogMask::MessageError ); } @@ -252,22 +253,21 @@ ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, co } SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn, - const OUString& rPath, - const OUString& rBaseURL, + OUString aPath, + OUString aBaseURL, bool bReadNewDoc, SfxMedium* pMed, bool bReadUTF8, bool bNoHTMLComments, const OUString& rNamespace ) : SfxHTMLParser( rIn, bReadNewDoc, pMed ), - m_aPathToFile( rPath ), - m_sBaseURL( rBaseURL ), + m_aPathToFile(std::move( aPath )), + m_sBaseURL(std::move( aBaseURL )), m_xAttrTab(std::make_shared<HTMLAttrTable>()), m_pNumRuleInfo( new SwHTMLNumRuleInfo ), m_xDoc( pD ), m_pActionViewShell( nullptr ), m_pSttNdIdx( nullptr ), m_pFormImpl( nullptr ), - m_pMarquee( nullptr ), m_pImageMap( nullptr ), m_nBaseFontStMin( 0 ), m_nFontStMin( 0 ), @@ -290,6 +290,7 @@ SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn, m_bSetModEnabled( false ), m_bInFloatingFrame( false ), m_bInField( false ), + m_bKeepUnknown( false ), m_bCallNextToken( false ), m_bIgnoreRawData( false ), m_bLBEntrySelected ( false ), @@ -311,11 +312,17 @@ SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn, m_bNotifyMacroEventRead( false ), m_isInTableStructure(false), m_nTableDepth( 0 ), + m_nFloatingFrames( 0 ), + m_nListItems( 0 ), m_pTempViewFrame(nullptr) { // If requested explicitly, then force ignoring of comments (don't create postits for them). - if (!utl::ConfigManager::IsFuzzing() && officecfg::Office::Writer::Filter::Import::HTML::IgnoreComments::get()) - m_bIgnoreHTMLComments = true; + if (!bFuzzing) + { + if (officecfg::Office::Writer::Filter::Import::HTML::IgnoreComments::get()) + m_bIgnoreHTMLComments = true; + m_bKeepUnknown = officecfg::Office::Common::Filter::HTML::Import::UnknownTag::get(); + } m_nEventId = nullptr; m_bUpperSpace = m_bViewCreated = m_bChkJumpMark = false; @@ -323,20 +330,25 @@ SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn, m_eScriptLang = HTMLScriptLanguage::Unknown; rCursor.DeleteMark(); - m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwIndexReg assert + m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwContentIndexReg assert memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable)); // Read the font sizes 1-7 from the INI file - SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get(); - m_aFontHeights[0] = rHtmlOptions.GetFontSize( 0 ) * 20; - m_aFontHeights[1] = rHtmlOptions.GetFontSize( 1 ) * 20; - m_aFontHeights[2] = rHtmlOptions.GetFontSize( 2 ) * 20; - m_aFontHeights[3] = rHtmlOptions.GetFontSize( 3 ) * 20; - m_aFontHeights[4] = rHtmlOptions.GetFontSize( 4 ) * 20; - m_aFontHeights[5] = rHtmlOptions.GetFontSize( 5 ) * 20; - m_aFontHeights[6] = rHtmlOptions.GetFontSize( 6 ) * 20; - - m_bKeepUnknown = rHtmlOptions.IsImportUnknown(); + if (!bFuzzing) + { + m_aFontHeights[0] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_1::get() * 20; + m_aFontHeights[1] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_2::get() * 20; + m_aFontHeights[2] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_3::get() * 20; + m_aFontHeights[3] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_4::get() * 20; + m_aFontHeights[4] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_5::get() * 20; + m_aFontHeights[5] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_6::get() * 20; + m_aFontHeights[6] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_7::get() * 20; + } + else + { + m_aFontHeights[0] = m_aFontHeights[1] = m_aFontHeights[2] = m_aFontHeights[3] = + m_aFontHeights[4] = m_aFontHeights[5] = m_aFontHeights[6] = 12 * 20; + } if(bReadNewDoc) { @@ -359,7 +371,8 @@ SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn, m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true); m_pCSS1Parser.reset(new SwCSS1Parser(m_xDoc.get(), *this, m_aFontHeights, m_sBaseURL, IsNewDoc())); - m_pCSS1Parser->SetIgnoreFontFamily( rHtmlOptions.IsIgnoreFontFamily() ); + if (!bFuzzing) + m_pCSS1Parser->SetIgnoreFontFamily( officecfg::Office::Common::Filter::HTML::Import::FontSetting::get() ); if( bReadUTF8 ) { @@ -519,7 +532,7 @@ SwHTMLParser::~SwHTMLParser() // the temporary view frame is hidden, so the hidden flag might need to be removed if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() ) - m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN ); + m_xDoc->GetDocShell()->GetMedium()->GetItemSet().ClearItem( SID_HIDDEN ); } } @@ -549,7 +562,7 @@ SvParserState SwHTMLParser::CallParser() m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false ); - *m_pSttNdIdx = pPos->nNode.GetIndex()-1; + *m_pSttNdIdx = pPos->GetNodeIndex()-1; m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false ); SwPaM aInsertionRangePam( *pPos ); @@ -595,7 +608,7 @@ SvParserState SwHTMLParser::CallParser() return eRet; } -bool SwHTMLParser::CanRemoveNode(sal_uLong nNodeIdx) const +bool SwHTMLParser::CanRemoveNode(SwNodeOffset nNodeIdx) const { const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1]; return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode()); @@ -739,29 +752,28 @@ void SwHTMLParser::Continue( HtmlTokenId nToken ) { const sal_Int32 nStt = pTextNode->GetText().getLength(); // when the cursor is still in the node, then set him at the end - if( m_pPam->GetPoint()->nNode == aNxtIdx ) + if( m_pPam->GetPoint()->GetNode() == aNxtIdx.GetNode() ) { - m_pPam->GetPoint()->nNode = *m_pSttNdIdx; - m_pPam->GetPoint()->nContent.Assign( pTextNode, nStt ); + m_pPam->GetPoint()->Assign( *pTextNode, nStt ); } #if OSL_DEBUG_LEVEL > 0 // !!! shouldn't be possible, or ?? - OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().nNode.GetIndex(), + OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().GetNodeIndex(), "Pam.Bound1 is still in the node" ); - OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).nNode.GetIndex(), + OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).GetNodeIndex(), "Pam.Bound2 is still in the node" ); - if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().nNode.GetIndex() ) + if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().GetNodeIndex() ) { - const sal_Int32 nCntPos = m_pPam->GetBound().nContent.GetIndex(); - m_pPam->GetBound().nContent.Assign( pTextNode, + const sal_Int32 nCntPos = m_pPam->GetBound().GetContentIndex(); + m_pPam->GetBound().SetContent( pTextNode->GetText().getLength() + nCntPos ); } - if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).nNode.GetIndex() ) + if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).GetNodeIndex() ) { - const sal_Int32 nCntPos = m_pPam->GetBound( false ).nContent.GetIndex(); - m_pPam->GetBound( false ).nContent.Assign( pTextNode, + const sal_Int32 nCntPos = m_pPam->GetBound( false ).GetContentIndex(); + m_pPam->GetBound( false ).SetContent( pTextNode->GetText().getLength() + nCntPos ); } #endif @@ -787,19 +799,19 @@ void SwHTMLParser::Continue( HtmlTokenId nToken ) // now remove the last useless paragraph SwPosition* pPos = m_pPam->GetPoint(); - if( !pPos->nContent.GetIndex() && !bLFStripped ) + if( !pPos->GetContentIndex() && !bLFStripped ) { SwTextNode* pCurrentNd; - sal_uLong nNodeIdx = pPos->nNode.GetIndex(); + SwNodeOffset nNodeIdx = pPos->GetNodeIndex(); bool bHasFlysOrMarks = HasCurrentParaFlys() || HasCurrentParaBookmarks( true ); if( IsNewDoc() ) { - if (!m_pPam->GetPoint()->nContent.GetIndex() && CanRemoveNode(nNodeIdx)) + if (!m_pPam->GetPoint()->GetContentIndex() && CanRemoveNode(nNodeIdx)) { - SwContentNode* pCNd = m_pPam->GetContentNode(); + SwContentNode* pCNd = m_pPam->GetPointContentNode(); if( pCNd && pCNd->StartOfSectionIndex()+2 < pCNd->EndOfSectionIndex() && !bHasFlysOrMarks ) { @@ -807,32 +819,36 @@ void SwHTMLParser::Continue( HtmlTokenId nToken ) SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh ); if( pCursorSh && pCursorSh->GetCursor()->GetPoint() - ->nNode.GetIndex() == nNodeIdx ) + ->GetNodeIndex() == nNodeIdx ) { pCursorSh->MovePara(GoPrevPara, fnParaEnd ); pCursorSh->SetMark(); pCursorSh->ClearMark(); } - m_pPam->GetBound().nContent.Assign( nullptr, 0 ); - m_pPam->GetBound(false).nContent.Assign( nullptr, 0 ); - m_xDoc->GetNodes().Delete( m_pPam->GetPoint()->nNode ); + SwNode& rDelNode = m_pPam->GetPoint()->GetNode(); + // move so we don't have a dangling SwContentIndex to the deleted node + m_pPam->GetPoint()->Adjust(SwNodeOffset(1)); + if (m_pPam->HasMark()) + m_pPam->GetMark()->Adjust(SwNodeOffset(1)); + m_xDoc->GetNodes().Delete( rDelNode ); } } } else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks ) { - if( pCurrentNd->CanJoinNext( &pPos->nNode )) + if( pCurrentNd->CanJoinNext( pPos )) { - SwTextNode* pNextNd = pPos->nNode.GetNode().GetTextNode(); - pPos->nContent.Assign( pNextNd, 0 ); + SwTextNode* pNextNd = pPos->GetNode().GetTextNode(); m_pPam->SetMark(); m_pPam->DeleteMark(); pNextNd->JoinPrev(); } else if (pCurrentNd->GetText().isEmpty()) { - pPos->nContent.Assign( nullptr, 0 ); m_pPam->SetMark(); m_pPam->DeleteMark(); - m_xDoc->GetNodes().Delete( pPos->nNode ); + SwNode& rDelNode = pPos->GetNode(); + // move so we don't have a dangling SwContentIndex to the deleted node + m_pPam->GetPoint()->Adjust(SwNodeOffset(+1)); + m_xDoc->GetNodes().Delete( rDelNode ); m_pPam->Move( fnMoveBackward ); } } @@ -841,10 +857,10 @@ void SwHTMLParser::Continue( HtmlTokenId nToken ) // annul the SplitNode from the beginning else if( !IsNewDoc() ) { - if( pPos->nContent.GetIndex() ) // then there was no <p> at the end + if( pPos->GetContentIndex() ) // then there was no <p> at the end m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next - SwTextNode* pTextNode = pPos->nNode.GetNode().GetTextNode(); - SwNodeIndex aPrvIdx( pPos->nNode ); + SwTextNode* pTextNode = pPos->GetNode().GetTextNode(); + SwNodeIndex aPrvIdx( pPos->GetNode() ); if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) && *m_pSttNdIdx <= aPrvIdx ) { @@ -861,9 +877,9 @@ void SwHTMLParser::Continue( HtmlTokenId nToken ) if( pPrev->HasSwAttrSet() ) pTextNode->SetAttr( *pPrev->GetpSwAttrSet() ); - if( &m_pPam->GetBound().nNode.GetNode() == pPrev ) + if( &m_pPam->GetBound().GetNode() == pPrev ) m_pPam->GetBound().nContent.Assign( pTextNode, 0 ); - if( &m_pPam->GetBound(false).nNode.GetNode() == pPrev ) + if( &m_pPam->GetBound(false).GetNode() == pPrev ) m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 ); pTextNode->JoinPrev(); @@ -871,7 +887,7 @@ void SwHTMLParser::Continue( HtmlTokenId nToken ) } // adjust AutoLoad in DocumentProperties - if (!utl::ConfigManager::IsFuzzing() && IsNewDoc()) + if (!bFuzzing && IsNewDoc()) { SwDocShell *pDocShell(m_xDoc->GetDocShell()); OSL_ENSURE(pDocShell, "no SwDocShell"); @@ -1255,7 +1271,7 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) { if( !m_bDocInitialized ) DocumentDetected(); - m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken ); + m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken.toString()); // if there are temporary paragraph attributes and the // paragraph isn't empty then the paragraph attributes @@ -1450,8 +1466,13 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) break; case HtmlTokenId::IFRAME_ON: - InsertFloatingFrame(); - m_bCallNextToken = m_bInFloatingFrame && m_xTable; + if (bFuzzing && m_nFloatingFrames > 64) + SAL_WARN("sw.html", "Not importing any more FloatingFrames for fuzzing performance"); + else + { + InsertFloatingFrame(); + m_bCallNextToken = m_bInFloatingFrame && m_xTable; + } break; case HtmlTokenId::LINEBREAK: @@ -1469,7 +1490,7 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) // CR in PRE/LISTING/XMP { if( HtmlTokenId::NEWPARA==nToken || - m_pPam->GetPoint()->nContent.GetIndex() ) + m_pPam->GetPoint()->GetContentIndex() ) { AppendTextNode(); // there is no LF at this place // therefore it will cause no problems @@ -1490,9 +1511,9 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) break; case HtmlTokenId::LINEFEEDCHAR: - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if( m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode(); - if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->nNode)) + if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->GetNode())) { NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK)); EndAttr( m_xAttrTab->pBreak, false ); @@ -1500,20 +1521,21 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) break; case HtmlTokenId::TEXTTOKEN: + case HtmlTokenId::CDATA: // insert string without spanning attributes at the end. - if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() ) + if (!aToken.isEmpty() && ' ' == aToken[0] && !IsReadPRE() && !GetPreserveSpaces()) { - sal_Int32 nPos = m_pPam->GetPoint()->nContent.GetIndex(); - const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr; + sal_Int32 nPos = m_pPam->GetPoint()->GetContentIndex(); + const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->GetNode().GetTextNode() : nullptr; if (pTextNode) { const OUString& rText = pTextNode->GetText(); sal_Unicode cLast = rText[--nPos]; if( ' ' == cLast || '\x0a' == cLast) - aToken = aToken.copy(1); + aToken.remove(0, 1); } else - aToken = aToken.copy(1); + aToken.remove(0, 1); if( aToken.isEmpty() ) { @@ -1532,6 +1554,12 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) // The text token is inside an OLE object, which means // alternate text. SwOLENode* pOLENode = m_aEmbeds.top(); + if (!pOLENode) + { + // <object> is mapped to an image -> ignore. + break; + } + if (SwFlyFrameFormat* pFormat = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat())) { @@ -1543,7 +1571,7 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) } } - m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken ); + m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken.toString()); // if there are temporary paragraph attributes and the // paragraph isn't empty then the paragraph attributes @@ -1753,10 +1781,10 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) if( m_nOpenParaToken != HtmlTokenId::NONE ) EndPara(); OSL_ENSURE(!m_xTable, "table in table not allowed here"); - if( !m_xTable && (IsNewDoc() || !m_pPam->GetNode().FindTableNode()) && - (m_pPam->GetPoint()->nNode.GetIndex() > + if( !m_xTable && (IsNewDoc() || !m_pPam->GetPointNode().FindTableNode()) && + (m_pPam->GetPoint()->GetNodeIndex() > m_xDoc->GetNodes().GetEndOfExtras().GetIndex() || - !m_pPam->GetNode().FindFootnoteStartNode() ) ) + !m_pPam->GetPointNode().FindFootnoteStartNode() ) ) { if ( m_nParaCnt < 5 ) Show(); // show what we have up to here @@ -1795,17 +1823,26 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) case HtmlTokenId::LI_ON: case HtmlTokenId::LISTHEADER_ON: if( m_nOpenParaToken != HtmlTokenId::NONE && - (m_pPam->GetPoint()->nContent.GetIndex() + (m_pPam->GetPoint()->GetContentIndex() || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) ) { // only finish paragraph for <P><LI>, not for <DD><LI> EndPara(); } - EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template - NewNumberBulletListItem( nToken ); - break; + if (bFuzzing && m_nListItems > 1024) + { + SAL_WARN("sw.html", "skipping remaining bullet import for performance during fuzzing"); + } + else + { + EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template + NewNumberBulletListItem( nToken ); + } + ++m_nListItems; + + break; case HtmlTokenId::LI_OFF: case HtmlTokenId::LISTHEADER_OFF: EndNumberBulletListItem( nToken, false ); @@ -2023,8 +2060,8 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) if( ' ' == aToken[ 3 ] && ' ' == aToken[ aToken.getLength()-3 ] ) { - OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) ); - InsertComment(comphelper::string::strip(aComment, ' ')); + std::u16string_view aComment( aToken.subView( 3, aToken.getLength()-5 ) ); + InsertComment(OUString(comphelper::string::strip(aComment, ' '))); } else { @@ -2111,7 +2148,7 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) // if there are temporary paragraph attributes and the // paragraph isn't empty then the paragraph attributes are final. - if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->nContent.GetIndex() ) + if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->GetContentIndex() ) m_aParaAttrs.clear(); } @@ -2163,12 +2200,12 @@ bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum ) m_aParaAttrs.clear(); SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ? - m_pPam->GetPoint()->nNode.GetNode().GetTextNode() : nullptr; + m_pPam->GetPoint()->GetNode().GetTextNode() : nullptr; if (pTextNode) { const SvxULSpaceItem& rULSpace = - static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE )); + pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ); bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0 : rULSpace.GetLower() == 0; @@ -2202,8 +2239,7 @@ bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum ) // split character attributes and maybe set none, // which are set for the whole paragraph - const SwNodeIndex& rEndIdx = aOldPos.nNode; - const sal_Int32 nEndCnt = aOldPos.nContent.GetIndex(); + const sal_Int32 nEndCnt = aOldPos.GetContentIndex(); const SwPosition& rPos = *m_pPam->GetPoint(); HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get()); @@ -2217,14 +2253,14 @@ bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum ) while( pAttr ) { HTMLAttr *pNext = pAttr->GetNext(); - if( pAttr->GetSttParaIdx() < rEndIdx.GetIndex() || + if( pAttr->GetStartParagraphIdx() < aOldPos.GetNodeIndex() || (!bWholePara && - pAttr->GetSttPara() == rEndIdx && - pAttr->GetSttCnt() != nEndCnt) ) + pAttr->GetStartParagraph() == aOldPos.GetNode() && + pAttr->GetStartContent() != nEndCnt) ) { bWholePara = - pAttr->GetSttPara() == rEndIdx && - pAttr->GetSttCnt() == 0; + pAttr->GetStartParagraph() == aOldPos.GetNode() && + pAttr->GetStartContent() == 0; sal_Int32 nStt = pAttr->m_nStartContent; bool bScript = false; @@ -2236,14 +2272,14 @@ bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum ) if( bScript ) { const SwTextNode *pTextNd = - pAttr->GetSttPara().GetNode().GetTextNode(); + pAttr->GetStartParagraph().GetNode().GetTextNode(); OSL_ENSURE( pTextNd, "No text node" ); if( pTextNd ) { const OUString& rText = pTextNd->GetText(); sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType( - rText, pAttr->GetSttCnt() ); + rText, pAttr->GetStartContent() ); sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter() ->endOfScript( rText, nStt, nScriptText ); while (nScriptEnd < nEndCnt && nScriptEnd != -1) @@ -2251,7 +2287,7 @@ bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum ) if( nScriptItem == nScriptText ) { HTMLAttr *pSetAttr = - pAttr->Clone( rEndIdx, nScriptEnd ); + pAttr->Clone( aOldPos.GetNode(), nScriptEnd ); pSetAttr->m_nStartContent = nStt; pSetAttr->ClearPrev(); if( !pNext || bWholePara ) @@ -2276,7 +2312,7 @@ bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum ) if( bInsert ) { HTMLAttr *pSetAttr = - pAttr->Clone( rEndIdx, nEndCnt ); + pAttr->Clone( aOldPos.GetNode(), nEndCnt ); pSetAttr->m_nStartContent = nStt; // When the attribute is for the whole paragraph, the outer @@ -2327,7 +2363,7 @@ bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum ) SetNodeNum( nLvl ); } else - m_pPam->GetNode().GetTextNode()->ResetAttr( RES_PARATR_NUMRULE ); + m_pPam->GetPointNode().GetTextNode()->ResetAttr( RES_PARATR_NUMRULE ); } // We must set the attribute of the paragraph before now (because of JavaScript) @@ -2335,7 +2371,7 @@ bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum ) // Now it is time to get rid of all script dependent hints that are // equal to the settings in the style - SwTextNode *pTextNd = rEndIdx.GetNode().GetTextNode(); + SwTextNode *pTextNd = aOldPos.GetNode().GetTextNode(); OSL_ENSURE( pTextNd, "There is the txt node" ); size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints()) ? pTextNd->GetSwpHints().Count() : 0; @@ -2462,14 +2498,14 @@ void SwHTMLParser::AddParSpace() m_bNoParSpace = false; - sal_uLong nNdIdx = m_pPam->GetPoint()->nNode.GetIndex() - 1; + SwNodeOffset nNdIdx = m_pPam->GetPoint()->GetNodeIndex() - 1; SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode(); if( !pTextNode ) return; SvxULSpaceItem rULSpace = - static_cast<const SvxULSpaceItem&>(pTextNode->SwContentNode::GetAttr( RES_UL_SPACE )); + pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ); if( rULSpace.GetLower() ) return; @@ -2558,7 +2594,7 @@ void SwHTMLParser::Show() // is the current node not visible anymore, then we use a bigger increment if( pVSh ) { - m_nParaCnt = (m_pPam->GetPoint()->nNode.GetNode().IsInVisibleArea(pVSh)) + m_nParaCnt = (m_pPam->GetPoint()->GetNode().IsInVisibleArea(pVSh)) ? 5 : 50; } } @@ -2643,15 +2679,12 @@ SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr ) if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) ) return m_pActionViewShell; - if( dynamic_cast< const SwEditShell *>( m_pActionViewShell ) != nullptr ) + if (SwEditShell* pEditShell = dynamic_cast<SwEditShell*>(m_pActionViewShell)) { // Already scrolled?, then make sure that the view doesn't move! const bool bOldLock = m_pActionViewShell->IsViewLocked(); m_pActionViewShell->LockView( true ); - const bool bOldEndActionByVirDev = m_pActionViewShell->IsEndActionByVirDev(); - m_pActionViewShell->SetEndActionByVirDev( true ); - static_cast<SwEditShell*>(m_pActionViewShell)->EndAction(); - m_pActionViewShell->SetEndActionByVirDev( bOldEndActionByVirDev ); + pEditShell->EndAction(); m_pActionViewShell->LockView( bOldLock ); // bChkJumpMark is only set when the object was also found @@ -2710,9 +2743,9 @@ void SwHTMLFrameFormatListener::Notify(const SfxHint& rHint) void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, std::deque<std::unique_ptr<HTMLAttr>> *pPostIts ) { - std::unique_ptr<SwPaM> pAttrPam( new SwPaM( *m_pPam->GetPoint() ) ); - const SwNodeIndex& rEndIdx = m_pPam->GetPoint()->nNode; - const sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex(); + SwPaM aAttrPam( *m_pPam->GetPoint() ); + const SwPosition& rEndPos = *m_pPam->GetPoint(); + const sal_Int32 nEndCnt = m_pPam->GetPoint()->GetContentIndex(); HTMLAttr* pAttr; SwContentNode* pCNd; @@ -2723,7 +2756,7 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, pAttr = m_aSetAttrTab[ --n ]; sal_uInt16 nWhich = pAttr->m_pItem->Which(); - sal_uLong nEndParaIdx = pAttr->GetEndParaIdx(); + SwNodeOffset nEndParaIdx = pAttr->GetEndParagraphIdx(); bool bSetAttr; if( bChkEnd ) { @@ -2733,23 +2766,23 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, // the whole paragraph, because they could be from a paragraph style // which can't be set. Because the attributes are inserted with // SETATTR_DONTREPLACE, they should be able to be set later. - bSetAttr = ( nEndParaIdx < rEndIdx.GetIndex() && - (RES_LR_SPACE != nWhich || !GetNumInfo().GetNumRule()) ) || + bSetAttr = ( nEndParaIdx < rEndPos.GetNodeIndex() && + ((RES_MARGIN_FIRSTLINE != nWhich && RES_MARGIN_TEXTLEFT != nWhich) || !GetNumInfo().GetNumRule()) ) || ( !pAttr->IsLikePara() && - nEndParaIdx == rEndIdx.GetIndex() && - pAttr->GetEndCnt() < nEndCnt && + nEndParaIdx == rEndPos.GetNodeIndex() && + pAttr->GetEndContent() < nEndCnt && (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) || ( bBeforeTable && - nEndParaIdx == rEndIdx.GetIndex() && - !pAttr->GetEndCnt() ); + nEndParaIdx == rEndPos.GetNodeIndex() && + !pAttr->GetEndContent() ); } else { // Attributes in body nodes array section shouldn't be set if we are in a // special nodes array section, but vice versa it's possible. - sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex(); - bSetAttr = nEndParaIdx < rEndIdx.GetIndex() || - rEndIdx.GetIndex() > nEndOfIcons || + SwNodeOffset nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex(); + bSetAttr = nEndParaIdx < rEndPos.GetNodeIndex() || + rEndPos.GetNodeIndex() > nEndOfIcons || nEndParaIdx <= nEndOfIcons; } @@ -2783,7 +2816,7 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, { // because of the awful deleting of nodes an index can also // point to an end node :-( - if ( (pAttr->GetSttPara() == pAttr->GetEndPara()) && + if ( (pAttr->GetStartParagraph() == pAttr->GetEndParagraph()) && !isTXTATR_NOEND(nWhich) ) { // when the end index also points to the node, we don't @@ -2792,7 +2825,7 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, pAttr = pPrev; continue; } - pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) ); + pCNd = SwNodes::GoNext(&(pAttr->m_nStartPara)); if( pCNd ) pAttr->m_nStartContent = 0; else @@ -2803,16 +2836,15 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, continue; } } - pAttrPam->GetPoint()->nNode = pAttr->m_nStartPara; // because of the deleting of BRs the start index can also // point behind the end the text if( pAttr->m_nStartContent > pCNd->Len() ) pAttr->m_nStartContent = pCNd->Len(); - pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nStartContent ); + aAttrPam.GetPoint()->Assign( *pCNd, pAttr->m_nStartContent ); - pAttrPam->SetMark(); - if ( (pAttr->GetSttPara() != pAttr->GetEndPara()) && + aAttrPam.SetMark(); + if ( (pAttr->GetStartParagraph() != pAttr->GetEndParagraph()) && !isTXTATR_NOEND(nWhich) ) { pCNd = pAttr->m_nEndPara.GetNode().GetContentNode(); @@ -2824,14 +2856,12 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, else { OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" ); - pAttrPam->DeleteMark(); + aAttrPam.DeleteMark(); delete pAttr; pAttr = pPrev; continue; } } - - pAttrPam->GetPoint()->nNode = pAttr->m_nEndPara; } else if( pAttr->IsLikePara() ) { @@ -2843,10 +2873,10 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, if( pAttr->m_nEndContent > pCNd->Len() ) pAttr->m_nEndContent = pCNd->Len(); - pAttrPam->GetPoint()->nContent.Assign( pCNd, pAttr->m_nEndContent ); + aAttrPam.GetPoint()->Assign( *pCNd, pAttr->m_nEndContent ); if( bBeforeTable && - pAttrPam->GetPoint()->nNode.GetIndex() == - rEndIdx.GetIndex() ) + aAttrPam.GetPoint()->GetNodeIndex() == + rEndPos.GetNodeIndex() ) { // If we're before inserting a table and the attribute ends // in the current node, then we must end it in the previous @@ -2854,16 +2884,16 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC && !isTXTATR_NOEND(nWhich) ) { - if( pAttrPam->GetMark()->nNode.GetIndex() != - rEndIdx.GetIndex() ) + if( aAttrPam.GetMark()->GetNodeIndex() != + rEndPos.GetNodeIndex() ) { - OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(), + OSL_ENSURE( !aAttrPam.GetPoint()->GetContentIndex(), "Content-Position before table not 0???" ); - pAttrPam->Move( fnMoveBackward ); + aAttrPam.Move( fnMoveBackward ); } else { - pAttrPam->DeleteMark(); + aAttrPam.DeleteMark(); delete pAttr; pAttr = pPrev; continue; @@ -2879,11 +2909,11 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess(); IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName ); if( ppBkmk != pMarkAccess->getAllMarksEnd() && - (*ppBkmk)->GetMarkStart() == *pAttrPam->GetPoint() ) + (*ppBkmk)->GetMarkStart() == *aAttrPam.GetPoint() ) break; // do not generate duplicates on this position - pAttrPam->DeleteMark(); + aAttrPam.DeleteMark(); const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark( - *pAttrPam, + aAttrPam, sName, IDocumentMarkAccess::MarkType::BOOKMARK, ::sw::mark::InsertMode::New); @@ -2914,22 +2944,10 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, aFields.emplace_back( pAttr); } } - pAttrPam->DeleteMark(); + aAttrPam.DeleteMark(); pAttr = pPrev; continue; - case RES_LR_SPACE: - if( pAttrPam->GetPoint()->nNode.GetIndex() == - pAttrPam->GetMark()->nNode.GetIndex()) - { - // because of numbering set this attribute directly at node - pCNd->SetAttr( *pAttr->m_pItem ); - break; - } - OSL_ENSURE( false, - "LRSpace set over multiple paragraphs!" ); - [[fallthrough]]; // (shouldn't reach this point anyway) - // tdf#94088 expand RES_BACKGROUND to the new fill attribute // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST]. // This is the right place in the future if the adapted fill attributes @@ -2937,12 +2955,30 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, case RES_BACKGROUND: { const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem); - SfxItemSet aNewSet(m_xDoc->GetAttrPool(), svl::Items<XATTR_FILL_FIRST, XATTR_FILL_LAST>{}); + SfxItemSetFixed<XATTR_FILL_FIRST, XATTR_FILL_LAST> aNewSet(m_xDoc->GetAttrPool()); setSvxBrushItemAsFillAttributesToTargetSet(rBrush, aNewSet); - m_xDoc->getIDocumentContentOperations().InsertItemSet(*pAttrPam, aNewSet, SetAttrMode::DONTREPLACE); + m_xDoc->getIDocumentContentOperations().InsertItemSet(aAttrPam, aNewSet, SetAttrMode::DONTREPLACE); break; } + + case RES_LR_SPACE: + assert(false); + break; + + case RES_MARGIN_FIRSTLINE: + case RES_MARGIN_TEXTLEFT: + case RES_MARGIN_RIGHT: + if( aAttrPam.GetPoint()->GetNodeIndex() == + aAttrPam.GetMark()->GetNodeIndex()) + { + // because of numbering set this attribute directly at node + pCNd->SetAttr( *pAttr->m_pItem ); + break; + } + OSL_ENSURE( false, + "LRSpace set over multiple paragraphs!" ); + [[fallthrough]]; // (shouldn't reach this point anyway) default: // maybe jump to a bookmark @@ -2954,9 +2990,9 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, m_eJumpTo = JumpToMarks::NONE; } - m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE ); + m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE ); } - pAttrPam->DeleteMark(); + aAttrPam.DeleteMark(); delete pAttr; pAttr = pPrev; @@ -2978,31 +3014,29 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor(); OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(), "Only At-Para flys need special handling" ); - const SwPosition *pFlyPos = rAnchor.GetContentAnchor(); - sal_uLong nFlyParaIdx = pFlyPos->nNode.GetIndex(); + SwNodeOffset nFlyParaIdx = rAnchor.GetAnchorNode()->GetIndex(); bool bMoveFly; if( bChkEnd ) { - bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() || - ( nFlyParaIdx == rEndIdx.GetIndex() && + bMoveFly = nFlyParaIdx < rEndPos.GetNodeIndex() || + ( nFlyParaIdx == rEndPos.GetNodeIndex() && m_aMoveFlyCnts[n] < nEndCnt ); } else { - sal_uLong nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex(); - bMoveFly = nFlyParaIdx < rEndIdx.GetIndex() || - rEndIdx.GetIndex() > nEndOfIcons || + SwNodeOffset nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex(); + bMoveFly = nFlyParaIdx < rEndPos.GetNodeIndex() || + rEndPos.GetNodeIndex() > nEndOfIcons || nFlyParaIdx <= nEndOfIcons; } if( bMoveFly ) { pFrameFormat->DelFrames(); - *pAttrPam->GetPoint() = *pFlyPos; - pAttrPam->GetPoint()->nContent.Assign( pAttrPam->GetContentNode(), - m_aMoveFlyCnts[n] ); + *aAttrPam.GetPoint() = *rAnchor.GetContentAnchor(); + aAttrPam.GetPoint()->SetContent( m_aMoveFlyCnts[n] ); SwFormatAnchor aAnchor( rAnchor ); aAnchor.SetType( RndStdIds::FLY_AT_CHAR ); - aAnchor.SetAnchor( pAttrPam->GetPoint() ); + aAnchor.SetAnchor( aAttrPam.GetPoint() ); pFrameFormat->SetFormatAttr( aAnchor ); const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient(); @@ -3028,20 +3062,19 @@ void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable, for (auto & field : aFields) { pCNd = field->m_nStartPara.GetNode().GetContentNode(); - pAttrPam->GetPoint()->nNode = field->m_nStartPara; - pAttrPam->GetPoint()->nContent.Assign( pCNd, field->m_nStartContent ); + aAttrPam.GetPoint()->Assign( *pCNd, field->m_nStartContent ); if( bBeforeTable && - pAttrPam->GetPoint()->nNode.GetIndex() == rEndIdx.GetIndex() ) + aAttrPam.GetPoint()->GetNodeIndex() == rEndPos.GetNodeIndex() ) { OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" ); - OSL_ENSURE( !pAttrPam->GetPoint()->nContent.GetIndex(), + OSL_ENSURE( !aAttrPam.GetPoint()->GetContentIndex(), "Content-Position before table not 0???" ); // !!! - pAttrPam->Move( fnMoveBackward ); + aAttrPam.Move( fnMoveBackward ); } - m_xDoc->getIDocumentContentOperations().InsertPoolItem( *pAttrPam, *field->m_pItem ); + m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *field->m_pItem ); field.reset(); } @@ -3074,8 +3107,8 @@ bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty ) OSL_ENSURE( ppHead, "No list header attribute found!" ); // save the current position as end position - const SwNodeIndex* pEndIdx = &m_pPam->GetPoint()->nNode; - sal_Int32 nEndCnt = m_pPam->GetPoint()->nContent.GetIndex(); + const SwPosition* pEndPos = m_pPam->GetPoint(); + sal_Int32 nEndCnt = m_pPam->GetPoint()->GetContentIndex(); // Is the last started or an earlier started attribute being ended? HTMLAttr *pLast = nullptr; @@ -3096,11 +3129,11 @@ bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty ) bool bMoveBack = false; sal_uInt16 nWhich = pAttr->m_pItem->Which(); if( !nEndCnt && RES_PARATR_BEGIN <= nWhich && - *pEndIdx != pAttr->GetSttPara() ) + pEndPos->GetNodeIndex() != pAttr->GetStartParagraph().GetIndex() ) { // Then move back one position in the content! bMoveBack = m_pPam->Move( fnMoveBackward ); - nEndCnt = m_pPam->GetPoint()->nContent.GetIndex(); + nEndCnt = m_pPam->GetPoint()->GetContentIndex(); } // now end the attribute @@ -3112,12 +3145,12 @@ bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty ) // does it have a non-empty range? if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) || RES_PAGEDESC == nWhich || RES_BREAK == nWhich || - *pEndIdx != pAttr->GetSttPara() || - nEndCnt != pAttr->GetSttCnt() ) + pEndPos->GetNodeIndex() != pAttr->GetStartParagraph().GetIndex() || + nEndCnt != pAttr->GetStartContent() ) { bInsert = true; // We do some optimization for script dependent attributes here. - if( *pEndIdx == pAttr->GetSttPara() ) + if( pEndPos->GetNodeIndex() == pAttr->GetStartParagraph().GetIndex() ) { lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem ); } @@ -3128,21 +3161,21 @@ bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty ) } const SwTextNode *pTextNd = (bInsert && bScript) ? - pAttr->GetSttPara().GetNode().GetTextNode() : + pAttr->GetStartParagraph().GetNode().GetTextNode() : nullptr; if (pTextNd) { const OUString& rText = pTextNd->GetText(); sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType( - rText, pAttr->GetSttCnt() ); + rText, pAttr->GetStartContent() ); sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter() - ->endOfScript( rText, pAttr->GetSttCnt(), nScriptText ); + ->endOfScript( rText, pAttr->GetStartContent(), nScriptText ); while (nScriptEnd < nEndCnt && nScriptEnd != -1) { if( nScriptItem == nScriptText ) { - HTMLAttr *pSetAttr = pAttr->Clone( *pEndIdx, nScriptEnd ); + HTMLAttr *pSetAttr = pAttr->Clone( pEndPos->GetNode(), nScriptEnd ); pSetAttr->ClearPrev(); if( pNext ) pNext->InsertPrev( pSetAttr ); @@ -3164,7 +3197,7 @@ bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty ) } if( bInsert ) { - pAttr->m_nEndPara = *pEndIdx; + pAttr->m_nEndPara = pEndPos->GetNode(); pAttr->m_nEndContent = nEndCnt; pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich && RES_TXTATR_CHARFMT != nWhich; @@ -3319,20 +3352,19 @@ void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttr "Danger: there are non-final paragraph attributes"); m_aParaAttrs.clear(); - const SwNodeIndex& nSttIdx = m_pPam->GetPoint()->nNode; - SwNodeIndex nEndIdx( nSttIdx ); + SwNodeIndex nEndIdx( m_pPam->GetPoint()->GetNode() ); // close all still open attributes and re-open them after the table HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get()); HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get()); bool bSetAttr = true; - const sal_Int32 nSttCnt = m_pPam->GetPoint()->nContent.GetIndex(); + const sal_Int32 nSttCnt = m_pPam->GetPoint()->GetContentIndex(); sal_Int32 nEndCnt = nSttCnt; if( bMoveEndBack ) { - sal_uLong nOldEnd = nEndIdx.GetIndex(); - sal_uLong nTmpIdx; + SwNodeOffset nOldEnd = nEndIdx.GetIndex(); + SwNodeOffset nTmpIdx; if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd || ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd ) { @@ -3355,15 +3387,15 @@ void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttr HTMLAttr *pPrev = pAttr->GetPrev(); if( bSetAttr && - ( pAttr->GetSttParaIdx() < nEndIdx.GetIndex() || - (pAttr->GetSttPara() == nEndIdx && - pAttr->GetSttCnt() != nEndCnt) ) ) + ( pAttr->GetStartParagraphIdx() < nEndIdx.GetIndex() || + (pAttr->GetStartParagraph() == nEndIdx && + pAttr->GetStartContent() != nEndCnt) ) ) { // The attribute must be set before the list. We need the // original and therefore we clone it, because pointer to the // attribute exist in the other contexts. The Next-List is lost // in doing so, but the Previous-List is preserved. - HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx, nEndCnt ); + HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx.GetNode(), nEndCnt ); if( pNext ) pNext->InsertPrev( pSetAttr ); @@ -3391,7 +3423,7 @@ void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttr } // set the start of the attribute anew and break link - pAttr->Reset(nSttIdx, nSttCnt, pSaveAttributes, rNewAttrTab); + pAttr->Reset(m_pPam->GetPoint()->GetNode(), nSttCnt, pSaveAttributes, rNewAttrTab); if (*pSaveAttributes) { @@ -3605,7 +3637,7 @@ void SwHTMLParser::NewBasefontAttr() switch( rOption.GetToken() ) { case HtmlOptionId::SIZE: - nSize = static_cast<sal_uInt16>(rOption.GetNumber()); + nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber()); break; case HtmlOptionId::ID: aId = rOption.GetString(); @@ -3721,7 +3753,7 @@ void SwHTMLParser::NewFontAttr( HtmlTokenId nToken ) else if( nSSize > 7 ) nSSize = 7; - nSize = static_cast<sal_uInt16>(nSSize); + nSize = o3tl::narrowing<sal_uInt16>(nSSize); nFontHeight = m_aFontHeights[nSize-1]; } break; @@ -3914,7 +3946,7 @@ void SwHTMLParser::EndFontAttr( HtmlTokenId nToken ) void SwHTMLParser::NewPara() { - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if( m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode( AM_SPACE ); else AddParSpace(); @@ -3946,6 +3978,11 @@ void SwHTMLParser::NewPara() case HtmlOptionId::DIR: aDir = rOption.GetString(); break; + case HtmlOptionId::XML_SPACE: + if (rOption.GetString() == "preserve") + SetPreserveSpaces(true); + break; + default: break; } } @@ -3969,6 +4006,9 @@ void SwHTMLParser::NewPara() "Class is not considered" ); DoPositioning( aItemSet, aPropInfo, xCntxt.get() ); InsertAttrs( aItemSet, aPropInfo, xCntxt.get() ); + + if (aPropInfo.m_bPreserveSpace) + SetPreserveSpaces(true); } } @@ -3993,15 +4033,16 @@ void SwHTMLParser::EndPara( bool bReal ) if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable) { #if OSL_DEBUG_LEVEL > 0 - const SwNumRule *pNumRule = m_pPam->GetNode().GetTextNode()->GetNumRule(); + const SwNumRule *pNumRule = m_pPam->GetPointNode().GetTextNode()->GetNumRule(); OSL_ENSURE( pNumRule, "Where is the NumRule" ); #endif } - // Netscape skips empty paragraphs, we do the same. + // Netscape skips empty paragraphs, we do the same; unless in XHTML mode, which prefers mapping + // the source document to the doc model 1:1 if possible. if( bReal ) { - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if (m_pPam->GetPoint()->GetContentIndex() || m_bXHTML) AppendTextNode( AM_SPACE ); else AddParSpace(); @@ -4033,6 +4074,7 @@ void SwHTMLParser::EndPara( bool bReal ) SetTextCollAttrs(); m_nOpenParaToken = HtmlTokenId::NONE; + SetPreserveSpaces(false); } void SwHTMLParser::NewHeading( HtmlTokenId nToken ) @@ -4070,7 +4112,7 @@ void SwHTMLParser::NewHeading( HtmlTokenId nToken ) } // open a new paragraph - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if( m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode( AM_SPACE ); else AddParSpace(); @@ -4124,7 +4166,7 @@ void SwHTMLParser::NewHeading( HtmlTokenId nToken ) void SwHTMLParser::EndHeading() { // open a new paragraph - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if( m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode( AM_SPACE ); else AddParSpace(); @@ -4219,7 +4261,7 @@ void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl ) OSL_ENSURE( false, "unknown style" ); break; } - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if( m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode( eMode ); else if( AM_SPACE==eMode ) AddParSpace(); @@ -4272,7 +4314,7 @@ void SwHTMLParser::EndTextFormatColl( HtmlTokenId nToken ) OSL_ENSURE( false, "unknown style" ); break; } - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if( m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode( eMode ); else if( AM_SPACE==eMode ) AddParSpace(); @@ -4323,7 +4365,7 @@ void SwHTMLParser::NewDefList() // open a new paragraph bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0; - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if( m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE ); else if( bSpace ) AddParSpace(); @@ -4367,10 +4409,10 @@ void SwHTMLParser::NewDefList() { // and the one of the DT-style of the current level - SvxLRSpaceItem rLRSpace = + SvxTextLeftMarginItem const& rTextLeftMargin = m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString()) - ->GetLRSpace(); - nLeft = nLeft + static_cast< sal_uInt16 >(rLRSpace.GetTextLeft()); + ->GetTextLeftMargin(); + nLeft = nLeft + static_cast<sal_uInt16>(rTextLeftMargin.GetTextLeft()); } xCntxt->SetMargins( nLeft, nRight, nIndent ); @@ -4398,7 +4440,7 @@ void SwHTMLParser::NewDefList() void SwHTMLParser::EndDefList() { bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1; - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if( m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE ); else if( bSpace ) AddParSpace(); @@ -4461,7 +4503,7 @@ void SwHTMLParser::NewDefListItem( HtmlTokenId nToken ) void SwHTMLParser::EndDefListItem( HtmlTokenId nToken ) { // open a new paragraph - if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->nContent.GetIndex() ) + if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode( AM_SOFTNOSPACE ); // search context matching the token and fetch it from stack @@ -4514,14 +4556,12 @@ void SwHTMLParser::EndDefListItem( HtmlTokenId nToken ) bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly, bool bSurroundOnly ) const { - SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode; + SwNode& rNode = m_pPam->GetPoint()->GetNode(); - const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats(); bool bFound = false; - for ( size_t i=0; i<rFrameFormatTable.size(); i++ ) + for(sw::SpzFrameFormat* pFormat: *m_xDoc->GetSpzFrameFormats()) { - const SwFrameFormat *const pFormat = rFrameFormatTable[i]; SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor(); // A frame was found, when // - it is paragraph-bound, and @@ -4529,11 +4569,11 @@ bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly, // - every paragraph-bound frame counts, or // - (only frames without wrapping count and) the frame doesn't have // a wrapping - SwPosition const*const pAPos = pAnchor->GetContentAnchor(); - if (pAPos && + SwNode const*const pAnchorNode = pAnchor->GetAnchorNode(); + if (pAnchorNode && ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) || (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) && - pAPos->nNode == rNodeIdx ) + *pAnchorNode == rNode ) { if( !(bNoSurroundOnly || bSurroundOnly) ) { @@ -4579,7 +4619,7 @@ bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly, const SwFormatColl *SwHTMLParser::GetCurrFormatColl() const { - const SwContentNode* pCNd = m_pPam->GetContentNode(); + const SwContentNode* pCNd = m_pPam->GetPointContentNode(); return pCNd ? &pCNd->GetAnyFormatColl() : nullptr; } @@ -4596,7 +4636,14 @@ void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext *pContext ) sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and short nFirstLineIndent = 0; // indentations - for( auto i = m_nContextStAttrMin; i < m_aContexts.size(); ++i ) + auto nDepth = m_aContexts.size(); + if (bFuzzing && nDepth > 128) + { + SAL_WARN("sw.html", "Not applying any more text collection attributes to a deeply nested node for fuzzing performance"); + nDepth = 0; + } + + for (auto i = m_nContextStAttrMin; i < nDepth; ++i) { const HTMLAttrContext *pCntxt = m_aContexts[i].get(); @@ -4673,7 +4720,7 @@ void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext *pContext ) else { // Maybe a default style exists? - nColl = pCntxt->GetDfltTextFormatColl(); + nColl = pCntxt->GetDefaultTextFormatColl(); if( nColl ) nDfltColl = nColl; } @@ -4702,24 +4749,20 @@ void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext *pContext ) const SwTextFormatColl *pTopColl = m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass ); const SfxItemSet& rItemSet = pTopColl->GetAttrSet(); - const SfxPoolItem *pItem; - if( SfxItemState::SET == rItemSet.GetItemState(RES_LR_SPACE,true, &pItem) ) + if (rItemSet.GetItemIfSet(RES_MARGIN_FIRSTLINE) + || rItemSet.GetItemIfSet(RES_MARGIN_TEXTLEFT) + || rItemSet.GetItemIfSet(RES_MARGIN_RIGHT)) { - const SvxLRSpaceItem *pLRItem = - static_cast<const SvxLRSpaceItem *>(pItem); - - sal_Int32 nLeft = pLRItem->GetTextLeft(); - sal_Int32 nRight = pLRItem->GetRight(); - nFirstLineIndent = pLRItem->GetTextFirstLineOffset(); + sal_Int32 nLeft = rItemSet.Get(RES_MARGIN_TEXTLEFT).GetTextLeft(); + sal_Int32 nRight = rItemSet.Get(RES_MARGIN_RIGHT).GetRight(); + nFirstLineIndent = rItemSet.Get(RES_MARGIN_FIRSTLINE).GetTextFirstLineOffset(); // In Definition lists the margins also contain the margins from the previous levels if( RES_POOLCOLL_HTML_DD == nTopColl ) { - const SvxLRSpaceItem& rDTLRSpace = m_pCSS1Parser - ->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString()) - ->GetLRSpace(); - nLeft -= rDTLRSpace.GetTextLeft(); - nRight -= rDTLRSpace.GetRight(); + auto const*const pColl(m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString())); + nLeft -= pColl->GetTextLeftMargin().GetTextLeft(); + nRight -= pColl->GetRightMargin().GetRight(); } else if( RES_POOLCOLL_HTML_DT == nTopColl ) { @@ -4734,10 +4777,8 @@ void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext *pContext ) pContext->SetMargins( nLeftMargin, nRightMargin, nFirstLineIndent ); } - if( SfxItemState::SET == rItemSet.GetItemState(RES_UL_SPACE,true, &pItem) ) + if( const SvxULSpaceItem* pULItem = rItemSet.GetItemIfSet(RES_UL_SPACE) ) { - const SvxULSpaceItem *pULItem = - static_cast<const SvxULSpaceItem *>(pItem); pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() ); } } @@ -4746,13 +4787,18 @@ void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext *pContext ) if( !pCollToSet ) { pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl ); - const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace(); if( !nLeftMargin ) - nLeftMargin = static_cast< sal_uInt16 >(rLRItem.GetTextLeft()); + { + nLeftMargin = static_cast<sal_uInt16>(pCollToSet->GetTextLeftMargin().GetTextLeft()); + } if( !nRightMargin ) - nRightMargin = static_cast< sal_uInt16 >(rLRItem.GetRight()); + { + nRightMargin = static_cast<sal_uInt16>(pCollToSet->GetRightMargin().GetRight()); + } if( !nFirstLineIndent ) - nFirstLineIndent = rLRItem.GetTextFirstLineOffset(); + { + nFirstLineIndent = pCollToSet->GetFirstLineIndent().GetTextFirstLineOffset(); + } } // remove previous hard attribution of paragraph @@ -4764,25 +4810,41 @@ void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext *pContext ) m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet ); // if applicable correct the paragraph indent - const SvxLRSpaceItem& rLRItem = pCollToSet->GetLRSpace(); - bool bSetLRSpace = nLeftMargin != rLRItem.GetTextLeft() || - nFirstLineIndent != rLRItem.GetTextFirstLineOffset() || - nRightMargin != rLRItem.GetRight(); + const SvxFirstLineIndentItem & rFirstLine = pCollToSet->GetFirstLineIndent(); + const SvxTextLeftMarginItem & rTextLeftMargin = pCollToSet->GetTextLeftMargin(); + const SvxRightMarginItem & rRightMargin = pCollToSet->GetRightMargin(); + bool bSetLRSpace = nLeftMargin != rTextLeftMargin.GetTextLeft() || + nFirstLineIndent != rFirstLine.GetTextFirstLineOffset() || + nRightMargin != rRightMargin.GetRight(); if( bSetLRSpace ) { - SvxLRSpaceItem aLRItem( rLRItem ); - aLRItem.SetTextLeft( nLeftMargin ); - aLRItem.SetRight( nRightMargin ); - aLRItem.SetTextFirstLineOffset( nFirstLineIndent ); + SvxFirstLineIndentItem firstLine(rFirstLine); + SvxTextLeftMarginItem leftMargin(rTextLeftMargin); + SvxRightMarginItem rightMargin(rRightMargin); + firstLine.SetTextFirstLineOffset(nFirstLineIndent); + leftMargin.SetTextLeft(nLeftMargin); + rightMargin.SetRight(nRightMargin); if( pItemSet ) - pItemSet->Put( aLRItem ); + { + pItemSet->Put(firstLine); + pItemSet->Put(leftMargin); + pItemSet->Put(rightMargin); + } else { - NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem); - m_xAttrTab->pLRSpace->SetLikePara(); - m_aParaAttrs.push_back( m_xAttrTab->pLRSpace ); - EndAttr( m_xAttrTab->pLRSpace, false ); + NewAttr(m_xAttrTab, &m_xAttrTab->pFirstLineIndent, firstLine); + m_xAttrTab->pFirstLineIndent->SetLikePara(); + m_aParaAttrs.push_back(m_xAttrTab->pFirstLineIndent); + EndAttr(m_xAttrTab->pFirstLineIndent, false); + NewAttr(m_xAttrTab, &m_xAttrTab->pTextLeftMargin, leftMargin); + m_xAttrTab->pTextLeftMargin->SetLikePara(); + m_aParaAttrs.push_back(m_xAttrTab->pTextLeftMargin); + EndAttr(m_xAttrTab->pTextLeftMargin, false); + NewAttr(m_xAttrTab, &m_xAttrTab->pRightMargin, rightMargin); + m_xAttrTab->pRightMargin->SetLikePara(); + m_aParaAttrs.push_back(m_xAttrTab->pRightMargin); + EndAttr(m_xAttrTab->pRightMargin, false); } } @@ -4908,8 +4970,7 @@ void SwHTMLParser::InsertSpacer() // create an empty text frame // fetch the ItemSet - SfxItemSet aFrameSet( m_xDoc->GetAttrPool(), - svl::Items<RES_FRMATR_BEGIN, RES_FRMATR_END-1>{} ); + SfxItemSetFixed<RES_FRMATR_BEGIN, RES_FRMATR_END-1> aFrameSet( m_xDoc->GetAttrPool() ); if( !IsNewDoc() ) Reader::ResetFrameFormatAttrs( aFrameSet ); @@ -4944,23 +5005,18 @@ void SwHTMLParser::InsertSpacer() case HTML_SPTYPE_VERT: if( nSize > 0 ) { - if (Application::GetDefaultDevice()) - { - nSize = Application::GetDefaultDevice() - ->PixelToLogic( Size(0,nSize), - MapMode(MapUnit::MapTwip) ).Height(); - } + nSize = o3tl::convert(nSize, o3tl::Length::px, o3tl::Length::twip); // set a paragraph margin SwTextNode *pTextNode = nullptr; - if( !m_pPam->GetPoint()->nContent.GetIndex() ) + if( !m_pPam->GetPoint()->GetContentIndex() ) { // if possible change the bottom paragraph margin // of previous node SetAttr(); // set still open paragraph attributes - pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->nNode.GetIndex()-1] + pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->GetNodeIndex()-1] ->GetTextNode(); // If the previous paragraph isn't a text node, then now an @@ -4972,14 +5028,13 @@ void SwHTMLParser::InsertSpacer() if( pTextNode ) { - SvxULSpaceItem aULSpace( static_cast<const SvxULSpaceItem&>(pTextNode - ->SwContentNode::GetAttr( RES_UL_SPACE )) ); - aULSpace.SetLower( aULSpace.GetLower() + static_cast<sal_uInt16>(nSize) ); + SvxULSpaceItem aULSpace( pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ) ); + aULSpace.SetLower( aULSpace.GetLower() + o3tl::narrowing<sal_uInt16>(nSize) ); pTextNode->SetAttr( aULSpace ); } else { - NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, static_cast<sal_uInt16>(nSize), RES_UL_SPACE)); + NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, o3tl::narrowing<sal_uInt16>(nSize), RES_UL_SPACE)); EndAttr( m_xAttrTab->pULSpace, false ); AppendTextNode(); // Don't change spacing! @@ -4992,14 +5047,9 @@ void SwHTMLParser::InsertSpacer() // If the paragraph is still empty, set first line // indentation, otherwise apply letter spacing over a space. - if (Application::GetDefaultDevice()) - { - nSize = Application::GetDefaultDevice() - ->PixelToLogic( Size(nSize,0), - MapMode(MapUnit::MapTwip) ).Width(); - } + nSize = o3tl::convert(nSize, o3tl::Length::px, o3tl::Length::twip); - if( !m_pPam->GetPoint()->nContent.GetIndex() ) + if( !m_pPam->GetPoint()->GetContentIndex() ) { sal_uInt16 nLeft=0, nRight=0; short nIndent = 0; @@ -5007,13 +5057,16 @@ void SwHTMLParser::InsertSpacer() GetMarginsFromContextWithNumberBullet( nLeft, nRight, nIndent ); nIndent = nIndent + static_cast<short>(nSize); - SvxLRSpaceItem aLRItem( RES_LR_SPACE ); - aLRItem.SetTextLeft( nLeft ); - aLRItem.SetRight( nRight ); - aLRItem.SetTextFirstLineOffset( nIndent ); + SvxFirstLineIndentItem const firstLine(nIndent, RES_MARGIN_FIRSTLINE); + SvxTextLeftMarginItem const leftMargin(nLeft, RES_MARGIN_TEXTLEFT); + SvxRightMarginItem const rightMargin(nRight, RES_MARGIN_RIGHT); - NewAttr(m_xAttrTab, &m_xAttrTab->pLRSpace, aLRItem); - EndAttr( m_xAttrTab->pLRSpace, false ); + NewAttr(m_xAttrTab, &m_xAttrTab->pFirstLineIndent, firstLine); + EndAttr(m_xAttrTab->pFirstLineIndent, false); + NewAttr(m_xAttrTab, &m_xAttrTab->pTextLeftMargin, leftMargin); + EndAttr(m_xAttrTab->pTextLeftMargin, false); + NewAttr(m_xAttrTab, &m_xAttrTab->pRightMargin, rightMargin); + EndAttr(m_xAttrTab->pRightMargin, false); } else { @@ -5027,14 +5080,8 @@ void SwHTMLParser::InsertSpacer() sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel ) { - if( nPixel && Application::GetDefaultDevice() ) - { - SwTwips nTwips = Application::GetDefaultDevice()->PixelToLogic( - Size( nPixel, nPixel ), MapMode( MapUnit::MapTwip ) ).Width(); - return static_cast<sal_uInt16>(std::min(nTwips, SwTwips(SAL_MAX_UINT16))); - } - else - return nPixel; + return std::min(o3tl::convert(nPixel, o3tl::Length::px, o3tl::Length::twip), + sal_Int64(SAL_MAX_UINT16)); } SwTwips SwHTMLParser::GetCurrentBrowseWidth() @@ -5082,21 +5129,8 @@ void SwHTMLParser::InsertIDOption() void SwHTMLParser::InsertLineBreak() { - // <BR CLEAR=xxx> is handled as: - // 1.) Only regard the paragraph-bound frames anchored in current paragraph. - // 2.) For left-justified aligned frames, CLEAR=LEFT or ALL, and for right- - // justified aligned frames, CLEAR=RIGHT or ALL, the wrap-through is - // changed as following: - // 3.) If the paragraph contains no text, then the frames don't get a wrapping - // 4.) otherwise a left aligned frame gets a right "only anchor" wrapping - // and a right aligned frame gets a left "only anchor" wrapping. - // 5.) if in a non-empty paragraph the wrapping of a frame is changed, - // then a new paragraph is opened - // 6.) If no wrappings of frames are changed, a hard line break is inserted. - OUString aId, aStyle, aClass; // the id of bookmark - bool bClearLeft = false, bClearRight = false; - bool bCleared = false; // Was a CLEAR executed? + SwLineBreakClear eClear = SwLineBreakClear::NONE; // then we fetch the options const HTMLOptions& rHTMLOptions = GetOptions(); @@ -5110,13 +5144,16 @@ void SwHTMLParser::InsertLineBreak() const OUString &rClear = rOption.GetString(); if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) ) { - bClearLeft = true; - bClearRight = true; + eClear = SwLineBreakClear::ALL; } else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) ) - bClearLeft = true; + { + eClear = SwLineBreakClear::LEFT; + } else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) ) - bClearRight = true; + { + eClear = SwLineBreakClear::LEFT; + } } break; case HtmlOptionId::ID: @@ -5132,57 +5169,6 @@ void SwHTMLParser::InsertLineBreak() } } - // CLEAR is only supported for the current paragraph - if( bClearLeft || bClearRight ) - { - SwNodeIndex& rNodeIdx = m_pPam->GetPoint()->nNode; - SwTextNode* pTextNd = rNodeIdx.GetNode().GetTextNode(); - if( pTextNd ) - { - const SwFrameFormats& rFrameFormatTable = *m_xDoc->GetSpzFrameFormats(); - - for( size_t i=0; i<rFrameFormatTable.size(); i++ ) - { - SwFrameFormat *const pFormat = rFrameFormatTable[i]; - SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor(); - SwPosition const*const pAPos = pAnchor->GetContentAnchor(); - if (pAPos && - ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) || - (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) && - pAPos->nNode == rNodeIdx && - pFormat->GetSurround().GetSurround() != css::text::WrapTextMode_NONE ) - { - sal_Int16 eHori = RES_DRAWFRMFMT == pFormat->Which() - ? text::HoriOrientation::LEFT - : pFormat->GetHoriOrient().GetHoriOrient(); - - css::text::WrapTextMode eSurround = css::text::WrapTextMode_PARALLEL; - if( m_pPam->GetPoint()->nContent.GetIndex() ) - { - if( bClearLeft && text::HoriOrientation::LEFT==eHori ) - eSurround = css::text::WrapTextMode_RIGHT; - else if( bClearRight && text::HoriOrientation::RIGHT==eHori ) - eSurround = css::text::WrapTextMode_LEFT; - } - else if( (bClearLeft && text::HoriOrientation::LEFT==eHori) || - (bClearRight && text::HoriOrientation::RIGHT==eHori) ) - { - eSurround = css::text::WrapTextMode_NONE; - } - - if( css::text::WrapTextMode_PARALLEL != eSurround ) - { - SwFormatSurround aSurround( eSurround ); - if( css::text::WrapTextMode_NONE != eSurround ) - aSurround.SetAnchorOnly( true ); - pFormat->SetFormatAttr( aSurround ); - bCleared = true; - } - } - } - } - } - // parse styles std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK)); bool bBreakItem = false; @@ -5209,12 +5195,26 @@ void SwHTMLParser::InsertLineBreak() EndAttr( m_xAttrTab->pBreak, false ); } - if( !bCleared && !bBreakItem ) + if (!bBreakItem) { - // If no CLEAR could or should be executed, a line break will be inserted - m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, "\x0A" ); + if (eClear == SwLineBreakClear::NONE) + { + // If no CLEAR could or should be executed, a line break will be inserted + m_xDoc->getIDocumentContentOperations().InsertString(*m_pPam, "\x0A"); + } + else + { + // <BR CLEAR=xxx> is mapped an SwFormatLineBreak. + SwTextNode* pTextNode = m_pPam->GetPointNode().GetTextNode(); + if (pTextNode) + { + SwFormatLineBreak aLineBreak(eClear); + sal_Int32 nPos = m_pPam->GetPoint()->GetContentIndex(); + pTextNode->InsertItem(aLineBreak, nPos, nPos); + } + } } - else if( m_pPam->GetPoint()->nContent.GetIndex() ) + else if( m_pPam->GetPoint()->GetContentIndex() ) { // If a CLEAR is executed in a non-empty paragraph, then after it // a new paragraph has to be opened. @@ -5255,11 +5255,11 @@ void SwHTMLParser::InsertHorzRule() aId = rOption.GetString(); break; case HtmlOptionId::SIZE: - nSize = static_cast<sal_uInt16>(rOption.GetNumber()); + nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber()); break; case HtmlOptionId::WIDTH: bPercentWidth = (rOption.GetString().indexOf('%') != -1); - nWidth = static_cast<sal_uInt16>(rOption.GetNumber()); + nWidth = o3tl::narrowing<sal_uInt16>(rOption.GetNumber()); if( bPercentWidth && nWidth>=100 ) { // the default case are 100% lines (no attributes necessary) @@ -5281,7 +5281,7 @@ void SwHTMLParser::InsertHorzRule() } } - if( m_pPam->GetPoint()->nContent.GetIndex() ) + if( m_pPam->GetPoint()->GetContentIndex() ) AppendTextNode( AM_NOSPACE ); if( m_nOpenParaToken != HtmlTokenId::NONE ) EndPara(); @@ -5320,12 +5320,12 @@ void SwHTMLParser::InsertHorzRule() } else if( bNoShade ) { - aBorderLine.SetWidth( DEF_LINE_WIDTH_2 ); + aBorderLine.SetWidth( SvxBorderLineWidth::Medium ); } else { aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE); - aBorderLine.SetWidth( DEF_LINE_WIDTH_0 ); + aBorderLine.SetWidth(SvxBorderLineWidth::Hairline); } SvxBoxItem aBoxItem(RES_BOX); @@ -5343,35 +5343,44 @@ void SwHTMLParser::InsertHorzRule() { // fake length and alignment of line above paragraph indents tools::Long nBrowseWidth = GetCurrentBrowseWidth(); - nWidth = bPercentWidth ? static_cast<sal_uInt16>((nWidth*nBrowseWidth) / 100) - : ToTwips( static_cast<sal_uInt16>(nBrowseWidth) ); + nWidth = bPercentWidth ? o3tl::narrowing<sal_uInt16>((nWidth*nBrowseWidth) / 100) + : ToTwips( o3tl::narrowing<sal_uInt16>(nBrowseWidth) ); if( nWidth < MINLAY ) nWidth = MINLAY; const SwFormatColl *pColl = (static_cast<tools::Long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr; if (pColl) { - SvxLRSpaceItem aLRItem( pColl->GetLRSpace() ); tools::Long nDist = nBrowseWidth - nWidth; + ::std::optional<SvxTextLeftMarginItem> oLeft; + ::std::optional<SvxRightMarginItem> oRight; switch( eAdjust ) { case SvxAdjust::Right: - aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) ); + oLeft.emplace(o3tl::narrowing<sal_uInt16>(nDist), RES_MARGIN_TEXTLEFT); break; case SvxAdjust::Left: - aLRItem.SetRight( static_cast<sal_uInt16>(nDist) ); + oRight.emplace(o3tl::narrowing<sal_uInt16>(nDist), RES_MARGIN_RIGHT); break; case SvxAdjust::Center: default: nDist /= 2; - aLRItem.SetTextLeft( static_cast<sal_uInt16>(nDist) ); - aLRItem.SetRight( static_cast<sal_uInt16>(nDist) ); + oLeft.emplace(o3tl::narrowing<sal_uInt16>(nDist), RES_MARGIN_TEXTLEFT); + oRight.emplace(o3tl::narrowing<sal_uInt16>(nDist), RES_MARGIN_RIGHT); break; } - HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aLRItem, nullptr, std::shared_ptr<HTMLAttrTable>()); - m_aSetAttrTab.push_back( pTmp ); + if (oLeft) + { + HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), *oLeft, nullptr, std::shared_ptr<HTMLAttrTable>()); + m_aSetAttrTab.push_back( pTmp ); + } + if (oRight) + { + HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), *oRight, nullptr, std::shared_ptr<HTMLAttrTable>()); + m_aSetAttrTab.push_back( pTmp ); + } } } } @@ -5440,21 +5449,21 @@ void SwHTMLParser::ParseMoreMetaOptions() return; } - OUStringBuffer sText; - sText.append("HTML: <"); - sText.append(OOO_STRING_SVTOOLS_HTML_meta); - sText.append(' '); + OUStringBuffer sText( + "HTML: <" + OOO_STRING_SVTOOLS_HTML_meta + " "); if( bHTTPEquiv ) sText.append(OOO_STRING_SVTOOLS_HTML_O_httpequiv); else sText.append(OOO_STRING_SVTOOLS_HTML_O_name); - sText.append("=\""); - sText.append(aName); - sText.append("\" "); - sText.append(OOO_STRING_SVTOOLS_HTML_O_content); - sText.append("=\""); - sText.append(aContent); - sText.append("\">"); + sText.append( + "=\"" + aName + + "\" " + OOO_STRING_SVTOOLS_HTML_O_content + "=\"" + + aContent + + "\">"); SwPostItField aPostItField( static_cast<SwPostItFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Postit )), @@ -5464,24 +5473,24 @@ void SwHTMLParser::ParseMoreMetaOptions() } HTMLAttr::HTMLAttr( const SwPosition& rPos, const SfxPoolItem& rItem, - HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab ) : - m_nStartPara( rPos.nNode ), - m_nEndPara( rPos.nNode ), - m_nStartContent( rPos.nContent.GetIndex() ), - m_nEndContent(rPos.nContent.GetIndex() ), + HTMLAttr **ppHd, std::shared_ptr<HTMLAttrTable> xAttrTab ) : + m_nStartPara( rPos.GetNode() ), + m_nEndPara( rPos.GetNode() ), + m_nStartContent( rPos.GetContentIndex() ), + m_nEndContent(rPos.GetContentIndex() ), m_bInsAtStart( true ), m_bLikePara( false ), m_bValid( true ), m_pItem( rItem.Clone() ), - m_xAttrTab( rAttrTab ), + m_xAttrTab(std::move( xAttrTab )), m_pNext( nullptr ), m_pPrev( nullptr ), m_ppHead( ppHd ) { } -HTMLAttr::HTMLAttr( const HTMLAttr &rAttr, const SwNodeIndex &rEndPara, - sal_Int32 nEndCnt, HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab ) : +HTMLAttr::HTMLAttr( const HTMLAttr &rAttr, const SwNode &rEndPara, + sal_Int32 nEndCnt, HTMLAttr **ppHd, std::shared_ptr<HTMLAttrTable> xAttrTab ) : m_nStartPara( rAttr.m_nStartPara ), m_nEndPara( rEndPara ), m_nStartContent( rAttr.m_nStartContent ), @@ -5490,7 +5499,7 @@ HTMLAttr::HTMLAttr( const HTMLAttr &rAttr, const SwNodeIndex &rEndPara, m_bLikePara( rAttr.m_bLikePara ), m_bValid( rAttr.m_bValid ), m_pItem( rAttr.m_pItem->Clone() ), - m_xAttrTab( rAttrTab ), + m_xAttrTab(std::move( xAttrTab )), m_pNext( nullptr ), m_pPrev( nullptr ), m_ppHead( ppHd ) @@ -5501,7 +5510,7 @@ HTMLAttr::~HTMLAttr() { } -HTMLAttr *HTMLAttr::Clone(const SwNodeIndex& rEndPara, sal_Int32 nEndCnt) const +HTMLAttr *HTMLAttr::Clone(const SwNode& rEndPara, sal_Int32 nEndCnt) const { // create the attribute anew with old start position HTMLAttr *pNew = new HTMLAttr( *this, rEndPara, nEndCnt, m_ppHead, m_xAttrTab ); @@ -5512,7 +5521,7 @@ HTMLAttr *HTMLAttr::Clone(const SwNodeIndex& rEndPara, sal_Int32 nEndCnt) const return pNew; } -void HTMLAttr::Reset(const SwNodeIndex& rSttPara, sal_Int32 nSttCnt, +void HTMLAttr::Reset(const SwNode& rSttPara, sal_Int32 nSttCnt, HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab) { // reset the start (and the end) @@ -5581,16 +5590,12 @@ void HTMLReader::SetupFilterOptions() if (!m_pMedium) return; - const SfxItemSet* pItemSet = m_pMedium->GetItemSet(); - if (!pItemSet) - return; - - auto pItem = pItemSet->GetItem<SfxStringItem>(SID_FILE_FILTEROPTIONS); + auto pItem = m_pMedium->GetItemSet().GetItem(SID_FILE_FILTEROPTIONS); if (!pItem) return; OUString aFilterOptions = pItem->GetValue(); - static const OUStringLiteral aXhtmlNsKey(u"xhtmlns="); + static constexpr OUString aXhtmlNsKey(u"xhtmlns="_ustr); if (aFilterOptions.startsWith(aXhtmlNsKey)) { OUString aNamespace = aFilterOptions.copy(aXhtmlNsKey.getLength()); @@ -5613,8 +5618,8 @@ namespace bool TestImportHTML(SvStream &rStream) { FontCacheGuard aFontCacheGuard; - std::unique_ptr<Reader> xReader(new HTMLReader); - xReader->m_pStream = &rStream; + HTMLReader aReader; + aReader.m_pStream = &rStream; SwGlobals::ensure(); @@ -5622,13 +5627,12 @@ bool TestImportHTML(SvStream &rStream) xDocSh->DoInitNew(); SwDoc *pD = static_cast<SwDocShell*>((&xDocSh))->GetDoc(); - SwNodeIndex aIdx(pD->GetNodes().GetEndOfContent(), -1); - SwPaM aPaM(aIdx); + SwPaM aPaM(pD->GetNodes().GetEndOfContent(), SwNodeOffset(-1)); pD->SetInReading(true); bool bRet = false; try { - bRet = xReader->Read(*pD, OUString(), aPaM, OUString()) == ERRCODE_NONE; + bRet = aReader.Read(*pD, OUString(), aPaM, OUString()) == ERRCODE_NONE; } catch (const std::runtime_error&) { |