/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #ifndef INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX #define INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX #include #include #include #include #include #include #include #include #include #include #include #include #include class SfxMedium; class SfxViewFrame; class SdrObject; class SvxMacroTableDtor; class SwDoc; class SwPaM; class SwViewShell; class SwStartNode; class SwFormatColl; class SwField; class SwHTMLForm_Impl; class SwApplet_Impl; struct SwHTMLFootEndNote_Impl; class HTMLTableCnts; struct SwPendingStack; class SvxCSS1PropertyInfo; struct ImplSVEvent; #define HTML_PARSPACE (MM50) #define HTML_CJK_PARSPACE (MM50/2) #define HTML_CTL_PARSPACE (MM50/2) #define HTML_DFLT_IMG_WIDTH (MM50*4) #define HTML_DFLT_IMG_HEIGHT (MM50*2) // some things you often need extern HTMLOptionEnum aHTMLPAlignTable[]; extern HTMLOptionEnum aHTMLImgHAlignTable[]; extern HTMLOptionEnum aHTMLImgVAlignTable[]; // attribute stack: class HTMLAttr; typedef std::deque HTMLAttrs; class HTMLAttr { friend class SwHTMLParser; friend class CellSaveStruct; SwNodeIndex nSttPara, nEndPara; sal_Int32 nSttContent, nEndContent; bool bInsAtStart : 1; bool bLikePara : 1; // set attribute above the whole paragraph bool bValid : 1; // is the attribute valid? std::unique_ptr pItem; HTMLAttr *pNext; // still to close attributes with different values HTMLAttr *pPrev; // already closed but not set attributes HTMLAttr **ppHead; // list head HTMLAttr( const SwPosition& rPos, const SfxPoolItem& rItem, HTMLAttr **pHd=nullptr ); HTMLAttr( const HTMLAttr &rAttr, const SwNodeIndex &rEndPara, sal_Int32 nEndCnt, HTMLAttr **pHd ); public: ~HTMLAttr(); HTMLAttr *Clone( const SwNodeIndex& rEndPara, sal_Int32 nEndCnt ) const; void Reset( const SwNodeIndex& rSttPara, sal_Int32 nSttCnt, HTMLAttr **pHd ); inline void SetStart( const SwPosition& rPos ); sal_uInt32 GetSttParaIdx() const { return nSttPara.GetIndex(); } sal_uInt32 GetEndParaIdx() const { return nEndPara.GetIndex(); } const SwNodeIndex& GetSttPara() const { return nSttPara; } const SwNodeIndex& GetEndPara() const { return nEndPara; } sal_Int32 GetSttCnt() const { return nSttContent; } sal_Int32 GetEndCnt() const { return nEndContent; } bool IsLikePara() const { return bLikePara; } void SetLikePara() { bLikePara = true; } SfxPoolItem& GetItem() { return *pItem; } const SfxPoolItem& GetItem() const { return *pItem; } HTMLAttr *GetNext() const { return pNext; } void InsertNext( HTMLAttr *pNxt ) { pNext = pNxt; } HTMLAttr *GetPrev() const { return pPrev; } void InsertPrev( HTMLAttr *pPrv ); void ClearPrev() { pPrev = nullptr; } void SetHead( HTMLAttr **ppHd ) { ppHead = ppHd; } // During setting attributes from styles it can happen that these // shouldn't be set anymore. To delete them would be very expensive, because // you don't know all the places where they are linked in. Therefore they're // made invalid and deleted at the next call of SetAttr_(). void Invalidate() { bValid = false; } }; // Table of attributes: The order here is important: The attributes in the // beginning of the table will set first in EndAllAttrs. struct HTMLAttrTable { HTMLAttr *pKeep, // frame attributes *pBox, *pBrush, *pBreak, *pPageDesc, *pLRSpace, // paragraph attributes *pULSpace, *pLineSpacing, *pAdjust, *pDropCap, *pSplit, *pWidows, *pOrphans, *pDirection, *pCharFormats, // text attributes *pINetFormat, *pBold, // character attributes *pBoldCJK, *pBoldCTL, *pItalic, *pItalicCJK, *pItalicCTL, *pStrike, *pUnderline, *pBlink, *pFont, *pFontCJK, *pFontCTL, *pFontHeight, *pFontHeightCJK, *pFontHeightCTL, *pFontColor, *pEscapement, *pCaseMap, *pKerning, // (only for SPACER) *pCharBrush, // character background *pLanguage, *pLanguageCJK, *pLanguageCTL, *pCharBox ; }; class HTMLAttrContext_SaveDoc; enum SwHTMLAppendMode { AM_NORMAL, // no paragraph spacing handling AM_NOSPACE, // set spacing hard to 0cm AM_SPACE, // set spacing hard to 0.5cm AM_SOFTNOSPACE, // don't set spacing, but save 0cm AM_NONE // no append }; class HTMLAttrContext { HTMLAttrs aAttrs; // the attributes created in the context OUString aClass; // context class HTMLAttrContext_SaveDoc *pSaveDocContext; std::unique_ptr pFrameItemSet; HtmlTokenId nToken; // the token of the context sal_uInt16 nTextFormatColl; // a style created in the context or zero sal_uInt16 nLeftMargin; // a changed left border sal_uInt16 nRightMargin; // a changed right border sal_uInt16 nFirstLineIndent; // a changed first line indent sal_uInt16 nUpperSpace; sal_uInt16 nLowerSpace; SwHTMLAppendMode eAppend; bool bLRSpaceChanged : 1; // left/right border, changed indent? bool bULSpaceChanged : 1; // top/bottom border changed? bool bDfltTextFormatColl : 1;// nTextFormatColl is only default bool bSpansSection : 1; // the context opens a SwSection bool bPopStack : 1; // delete above stack elements bool bFinishPREListingXMP : 1; bool bRestartPRE : 1; bool bRestartXMP : 1; bool bRestartListing : 1; bool bHeaderOrFooter : 1; public: void ClearSaveDocContext(); HTMLAttrContext( HtmlTokenId nTokn, sal_uInt16 nPoolId, const OUString& rClass, bool bDfltColl=false ) : aClass( rClass ), pSaveDocContext( nullptr ), nToken( nTokn ), nTextFormatColl( nPoolId ), nLeftMargin( 0 ), nRightMargin( 0 ), nFirstLineIndent( 0 ), nUpperSpace( 0 ), nLowerSpace( 0 ), eAppend( AM_NONE ), bLRSpaceChanged( false ), bULSpaceChanged( false ), bDfltTextFormatColl( bDfltColl ), bSpansSection( false ), bPopStack( false ), bFinishPREListingXMP( false ), bRestartPRE( false ), bRestartXMP( false ), bRestartListing( false ), bHeaderOrFooter( false ) {} explicit HTMLAttrContext( HtmlTokenId nTokn ) : pSaveDocContext( nullptr ), nToken( nTokn ), nTextFormatColl( 0 ), nLeftMargin( 0 ), nRightMargin( 0 ), nFirstLineIndent( 0 ), nUpperSpace( 0 ), nLowerSpace( 0 ), eAppend( AM_NONE ), bLRSpaceChanged( false ), bULSpaceChanged( false ), bDfltTextFormatColl( false ), bSpansSection( false ), bPopStack( false ), bFinishPREListingXMP( false ), bRestartPRE( false ), bRestartXMP( false ), bRestartListing( false ), bHeaderOrFooter( false ) {} ~HTMLAttrContext() { ClearSaveDocContext(); } HtmlTokenId GetToken() const { return nToken; } sal_uInt16 GetTextFormatColl() const { return bDfltTextFormatColl ? 0 : nTextFormatColl; } sal_uInt16 GetDfltTextFormatColl() const { return bDfltTextFormatColl ? nTextFormatColl : 0; } const OUString& GetClass() const { return aClass; } inline void SetMargins( sal_uInt16 nLeft, sal_uInt16 nRight, short nIndent ); bool IsLRSpaceChanged() const { return bLRSpaceChanged; } inline void GetMargins( sal_uInt16& nLeft, sal_uInt16& nRight, short &nIndent ) const; inline void SetULSpace( sal_uInt16 nUpper, sal_uInt16 nLower ); bool IsULSpaceChanged() const { return bULSpaceChanged; } inline void GetULSpace( sal_uInt16& rUpper, sal_uInt16& rLower ) const; bool HasAttrs() const { return !aAttrs.empty(); } const HTMLAttrs& GetAttrs() const { return aAttrs; } HTMLAttrs& GetAttrs() { return aAttrs; } void SetSpansSection( bool bSet ) { bSpansSection = bSet; } bool GetSpansSection() const { return bSpansSection; } void SetPopStack( bool bSet ) { bPopStack = bSet; } bool GetPopStack() const { return bPopStack; } bool HasSaveDocContext() const { return pSaveDocContext!=nullptr; } HTMLAttrContext_SaveDoc *GetSaveDocContext( bool bCreate=false ); const SfxItemSet *GetFrameItemSet() const { return pFrameItemSet.get(); } SfxItemSet *GetFrameItemSet( SwDoc *pCreateDoc ); void SetFinishPREListingXMP( bool bSet ) { bFinishPREListingXMP = bSet; } bool IsFinishPREListingXMP() const { return bFinishPREListingXMP; } void SetRestartPRE( bool bSet ) { bRestartPRE = bSet; } bool IsRestartPRE() const { return bRestartPRE; } void SetRestartXMP( bool bSet ) { bRestartXMP = bSet; } bool IsRestartXMP() const { return bRestartXMP; } void SetRestartListing( bool bSet ) { bRestartListing = bSet; } bool IsRestartListing() const { return bRestartListing; } void SetHeaderOrFooter( bool bSet ) { bHeaderOrFooter = bSet; } bool IsHeaderOrFooter() const { return bHeaderOrFooter; } void SetAppendMode( SwHTMLAppendMode eMode ) { eAppend = eMode; } SwHTMLAppendMode GetAppendMode() const { return eAppend; } }; typedef std::vector HTMLAttrContexts; class HTMLTable; class SwCSS1Parser; class SwHTMLNumRuleInfo; typedef std::vector> ImageMaps; enum class HtmlContextFlags { ProtectStack = 0x0001, StripPara = 0x0002, KeepNumrule = 0x0004, HeaderDist = 0x0008, FooterDist = 0x0010, KeepAttrs = 0x0020, MultiColMask = StripPara | KeepNumrule | KeepAttrs // for headers, footers or footnotes }; namespace o3tl { template<> struct typed_flags : is_typed_flags {}; } enum class HtmlFrameFormatFlags { Box = 0x0001, Background = 0x0002, Padding = 0x0004, Direction = 0x0008, }; namespace o3tl { template<> struct typed_flags : is_typed_flags {}; } class SwHTMLParser : public SfxHTMLParser, public SwClient { friend class SectionSaveStruct; friend class CellSaveStruct; friend class CaptionSaveStruct; OUString m_aPathToFile; OUString m_sBaseURL; OUString m_aBasicLib; OUString m_aBasicModule; OUString m_aScriptSource; // content of the current script block OUString m_aScriptType; // type of read script (StarBasic/VB/JAVA) OUString m_aScriptURL; // script URL OUString m_aStyleSource; // content of current style sheet OUString m_aContents; // text of current marquee, field and so OUString m_sTitle; OUString m_aUnknownToken; // a started unknown token OUString m_aBulletGrfs[MAXLEVEL]; OUString m_sJmpMark; std::vector m_aBaseFontStack; // stack for // Bit 0-2: font size (1-7) std::vector m_aFontStack; // stack for , , // Bit 0-2: font size (1-7) // Bit 15: font colour was set HTMLAttrs m_aSetAttrTab;// "closed", not set attributes HTMLAttrs m_aParaAttrs; // temporary paragraph attributes HTMLAttrTable m_aAttrTab; // "open" attributes HTMLAttrContexts m_aContexts;// the current context of attribute/token std::vector m_aMoveFlyFrames;// Fly-Frames, the anchor is moved std::deque m_aMoveFlyCnts;// and the Content-Positions SwApplet_Impl *m_pAppletImpl; // current applet SwCSS1Parser *m_pCSS1Parser; // Style-Sheet-Parser SwHTMLNumRuleInfo *m_pNumRuleInfo; SwPendingStack *m_pPendStack; rtl::Reference m_xDoc; SwPaM *m_pPam; // SwPosition should be enough, or ?? SwViewShell *m_pActionViewShell; // SwViewShell, where StartAction was called SwNodeIndex *m_pSttNdIdx; HTMLTable *m_pTable; // current "outermost" table std::vector m_aTables; SwHTMLForm_Impl *m_pFormImpl; // current form SdrObject *m_pMarquee; // current marquee SwField *m_pField; // current field ImageMap *m_pImageMap; // current image map ImageMaps *m_pImageMaps; ///< all Image-Maps that have been read SwHTMLFootEndNote_Impl *m_pFootEndNoteImpl; Size m_aHTMLPageSize; // page size of HTML template sal_uInt32 m_aFontHeights[7]; // font heights 1-7 ImplSVEvent * m_nEventId; sal_uInt16 m_nBaseFontStMin; sal_uInt16 m_nFontStMin; sal_uInt16 m_nDefListDeep; sal_uInt16 m_nFontStHeadStart; // elements in font stack at sal_uInt16 m_nSBModuleCnt; // counter for basic modules sal_uInt16 m_nMissingImgMaps; // How many image maps are still missing? size_t m_nParaCnt; size_t m_nContextStMin; // lower limit of PopContext size_t m_nContextStAttrMin; // lower limit of attributes sal_uInt16 m_nSelectEntryCnt; // Number of entries in the actual listbox HtmlTokenId m_nOpenParaToken; // opened paragraph element enum JumpToMarks { JUMPTO_NONE, JUMPTO_MARK, JUMPTO_TABLE, JUMPTO_FRAME, JUMPTO_REGION, JUMPTO_GRAPHIC } m_eJumpTo; #ifdef DBG_UTIL sal_uInt16 m_nContinue; // depth of Continue calls #endif SvxAdjust m_eParaAdjust; // adjustment of current paragraph HTMLScriptLanguage m_eScriptLang; // current script language bool m_bOldIsHTMLMode : 1; // Was it a HTML document? bool m_bDocInitalized : 1; // document resp. shell was initialize // flag to prevent double init via recursion bool m_bViewCreated : 1; // the view was already created (asynchronous) bool m_bSetModEnabled : 1; bool m_bInFloatingFrame : 1; // We are in a floating frame bool m_bInField : 1; bool m_bKeepUnknown : 1; // handle unknown/not supported tokens // 8 bool m_bCallNextToken : 1; // In tables: call NextToken in any case bool m_bIgnoreRawData : 1; // ignore content of script/style bool m_bLBEntrySelected : 1; // Is the current option selected? bool m_bTAIgnoreNewPara : 1; // ignore next LF in text area? bool m_bFixMarqueeWidth : 1; // Change size of marquee? bool m_bUpperSpace : 1; // top paragraph spacing is needed bool m_bNoParSpace : 1; // 16 bool m_bInNoEmbed : 1; // we are in a NOEMBED area bool m_bInTitle : 1; // we are in title bool m_bChkJumpMark : 1; // maybe jump to predetermined mark bool m_bUpdateDocStat : 1; bool m_bFixSelectWidth : 1; // Set new width of select? bool m_bTextArea : 1; // 24 bool m_bSelect : 1; bool m_bInFootEndNoteAnchor : 1; bool m_bInFootEndNoteSymbol : 1; bool m_bIgnoreHTMLComments : 1; bool m_bRemoveHidden : 1; // the filter implementation might set the hidden flag bool m_bBodySeen : 1; bool m_bReadingHeaderOrFooter : 1; bool m_isInTableStructure; /// the names corresponding to the DOCINFO field subtypes INFO[1-4] OUString m_InfoNames[4]; SfxViewFrame* m_pTempViewFrame; void DeleteFormImpl(); void DocumentDetected(); void Show(); void ShowStatline(); SwViewShell *CallStartAction( SwViewShell *pVSh = nullptr, bool bChkPtr = true ); SwViewShell *CallEndAction( bool bChkAction = false, bool bChkPtr = true ); SwViewShell *CheckActionViewShell(); DECL_LINK( AsyncCallback, void*, void ); // set attribute on document void SetAttr_( bool bChkEnd, bool bBeforeTable, HTMLAttrs *pPostIts ); void SetAttr( bool bChkEnd = true, bool bBeforeTable = false, HTMLAttrs *pPostIts = nullptr ) { if( !m_aSetAttrTab.empty() || !m_aMoveFlyFrames.empty() ) SetAttr_( bChkEnd, bBeforeTable, pPostIts ); } HTMLAttr **GetAttrTabEntry( sal_uInt16 nWhich ); // create a new text node on PaM position bool AppendTextNode( SwHTMLAppendMode eMode=AM_NORMAL, bool bUpdateNum=true ); void AddParSpace(); // start/end an attribute // ppDepAttr indicated an attribute table entry, which attribute has to be // set, before the attribute is closed void NewAttr( HTMLAttr **ppAttr, const SfxPoolItem& rItem ); bool EndAttr( HTMLAttr *pAttr, bool bChkEmpty=true ); void DeleteAttr( HTMLAttr* pAttr ); void EndContextAttrs( HTMLAttrContext *pContext ); void SaveAttrTab( HTMLAttrTable& rNewAttrTab ); void SplitAttrTab( const SwPosition& rNewPos ); void SplitAttrTab( HTMLAttrTable& rNewAttrTab, bool bMoveEndBack ); void RestoreAttrTab( HTMLAttrTable& rNewAttrTab ); void InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart ); void InsertAttrs( HTMLAttrs& rAttrs ); bool DoPositioning( SfxItemSet &rItemSet, SvxCSS1PropertyInfo &rPropInfo, HTMLAttrContext *pContext ); bool CreateContainer( const OUString& rClass, SfxItemSet &rItemSet, SvxCSS1PropertyInfo &rPropInfo, HTMLAttrContext *pContext ); bool EndSection( bool bLFStripped=false ); void InsertAttrs( SfxItemSet &rItemSet, SvxCSS1PropertyInfo const &rPropInfo, HTMLAttrContext *pContext, bool bCharLvl=false ); void InsertAttr( HTMLAttr **ppAttr, const SfxPoolItem & rItem, HTMLAttrContext *pCntxt ); void SplitPREListingXMP( HTMLAttrContext *pCntxt ); void FixHeaderFooterDistance( bool bHeader, const SwPosition *pOldPos ); void EndContext( HTMLAttrContext *pContext ); void ClearContext( HTMLAttrContext *pContext ); const SwFormatColl *GetCurrFormatColl() const; SwTwips GetCurrentBrowseWidth(); SwHTMLNumRuleInfo& GetNumInfo() { return *m_pNumRuleInfo; } // add parameter void SetNodeNum( sal_uInt8 nLevel ); // Manage paragraph styles // set the style resp. its attributes on the stack void SetTextCollAttrs( HTMLAttrContext *pContext = nullptr ); void InsertParaAttrs( const SfxItemSet& rItemSet ); // Manage attribute context // save current context inline void PushContext( HTMLAttrContext *pCntxt ); // Fetch top/specified context but not outside the context with token // nLimit. If bRemove set then remove it. HTMLAttrContext *PopContext( HtmlTokenId nToken = HtmlTokenId::NONE ); bool GetMarginsFromContext( sal_uInt16 &nLeft, sal_uInt16 &nRight, short& nIndent, bool bIgnoreCurrent=false ) const; void GetMarginsFromContextWithNumBul( sal_uInt16 &nLeft, sal_uInt16 &nRight, short& nIndent ) const; void GetULSpaceFromContext( sal_uInt16 &rUpper, sal_uInt16 &rLower ) const; void MovePageDescAttrs( SwNode *pSrcNd, sal_uLong nDestIdx, bool bFormatBreak ); // Handling of tags at paragraph level //

and

to

void NewPara(); void EndPara( bool bReal = false ); void NewHeading( HtmlTokenId nToken ); void EndHeading(); //
,
and
    void NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nPoolId );
    void EndTextFormatColl( HtmlTokenId nToken );

    // 
and
void NewDivision( HtmlTokenId nToken ); void EndDivision(); // insert/close Fly-Frames void InsertFlyFrame( const SfxItemSet& rItemSet, HTMLAttrContext *pCntxt, const OUString& rId ); void SaveDocContext( HTMLAttrContext *pCntxt, HtmlContextFlags nFlags, const SwPosition *pNewPos ); void RestoreDocContext( HTMLAttrContext *pCntxt ); // end all opened
areas bool EndSections( bool bLFStripped ); // void NewMultiCol( sal_uInt16 columnsFromCss=0 ); // void NewMarquee( HTMLTable *pCurTable=nullptr ); void EndMarquee(); void InsertMarqueeText(); // Handling of lists // order list
    and unordered list