summaryrefslogtreecommitdiff
path: root/sw
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.co.uk>2014-08-27 15:24:37 +0200
committerTomaž Vajngerl <quikee@gmail.com>2014-08-27 11:09:16 -0500
commit2c7333edf0b0fef2ee4f6757012196cf99a83ca5 (patch)
treeb8f1e36db57962b367db29584e2de9de3a82468f /sw
parentf29c5f742e8be13b5ee7d03ebf1bcaf2a4adfef9 (diff)
DOCX import: fix handling of embedded DOCX files
The problem was that SwXTextEmbeddedObject::getEmbeddedObject() returned an empty reference for those embedded objects, so the HTML filter couldn't extract their content when it wanted to do so. It turns out the reason for this was that the DOCX importer only handled the replacement image + raw native data for the object. Fix this by creating the embedded object with the correct CLSID and import the raw data into the empty embedded document model. This is similar to what is done for XLSX-in-PPTX in oox::drawingml::ShapeExport::WriteOLE2Shape(), just for the import part. (cherry picked from commit 41aa970b3120837ca9cadb12997a53ad322145a4) Conflicts: writerfilter/source/dmapper/OLEHandler.cxx writerfilter/source/dmapper/OLEHandler.hxx Also squash in 3 fixes on top of that in the HTML filter which are relevant only after the embedded DOCX import is fixed: 1) sw HTML export: don't crash on embedded object without native data (cherry picked from commit 35e954149d976e0e69a9f8b382fc67f5abc5eaa7) 2) sw HTML export: handle embedded text documents (cherry picked from commit 8d670c9452e930460be600212d73e22acd61cc0f) 3) sw HTML export: avoid invalid output for embedded documents When an sw document is embedded inside an sw one, then the inner sw HTML export should just write what's inside the <body>. Add a filter option for that in sw, similar to the one already existing in sc. (cherry picked from commit 8760b2b05c23e5fdf8ad7ad3f447a7489c2ddeac) Conflicts: sw/source/filter/html/wrthtml.cxx Change-Id: Id2cef966c1022ba45a3540bff234029c1d396778 Reviewed-on: https://gerrit.libreoffice.org/11151 Reviewed-by: Tomaž Vajngerl <quikee@gmail.com> Tested-by: Tomaž Vajngerl <quikee@gmail.com>
Diffstat (limited to 'sw')
-rw-r--r--sw/qa/extras/htmlexport/data/skipimage-embedded-document.docxbin0 -> 21379 bytes
-rw-r--r--sw/qa/extras/htmlexport/htmlexport.cxx13
-rw-r--r--sw/source/filter/html/htmlplug.cxx12
-rw-r--r--sw/source/filter/html/wrthtml.cxx20
-rw-r--r--sw/source/filter/html/wrthtml.hxx2
5 files changed, 41 insertions, 6 deletions
diff --git a/sw/qa/extras/htmlexport/data/skipimage-embedded-document.docx b/sw/qa/extras/htmlexport/data/skipimage-embedded-document.docx
new file mode 100644
index 000000000000..f48935ad4490
--- /dev/null
+++ b/sw/qa/extras/htmlexport/data/skipimage-embedded-document.docx
Binary files differ
diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx
index fc7f68925718..94fb66727971 100644
--- a/sw/qa/extras/htmlexport/htmlexport.cxx
+++ b/sw/qa/extras/htmlexport/htmlexport.cxx
@@ -149,6 +149,19 @@ DECLARE_HTMLEXPORT_TEST(testSkipImageEmbedded, "skipimage-embedded.doc")
assertXPath(pDoc, "//span/table", 1);
}
+DECLARE_HTMLEXPORT_TEST(testSkipImageEmbeddedDocument, "skipimage-embedded-document.docx")
+{
+ // Similar to testSkipImageEmbedded, but with an embedded Writer object,
+ // not a Calc one, and this time OOXML, not WW8.
+ htmlDocPtr pDoc = parseHtml(maTempFile);
+ CPPUNIT_ASSERT(pDoc);
+
+ // This was 2, the HTML header was in the document two times.
+ assertXPath(pDoc, "//meta[@name='generator']", 1);
+ // Text of embedded document was missing.
+ assertXPathContent(pDoc, "/html/body/p/span/p/span", "Inner.");
+}
+
#endif
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sw/source/filter/html/htmlplug.cxx b/sw/source/filter/html/htmlplug.cxx
index 489d28fd83d9..8f8f3019362c 100644
--- a/sw/source/filter/html/htmlplug.cxx
+++ b/sw/source/filter/html/htmlplug.cxx
@@ -1270,14 +1270,20 @@ Writer& OutHTML_FrmFmtOLENodeGrf( Writer& rWrt, const SwFrmFmt& rFrmFmt,
uno::Reference<text::XTextContent> xTextContent(xShape, uno::UNO_QUERY);
uno::Reference<document::XEmbeddedObjectSupplier2> xEmbeddedObjectSupplier(xTextContent, uno::UNO_QUERY);
uno::Reference<frame::XStorable> xStorable(xEmbeddedObjectSupplier->getEmbeddedObject(), uno::UNO_QUERY);
+ SAL_WARN_IF(!xStorable.is(), "sw.html", "OutHTML_FrmFmtOLENodeGrf: no embedded object");
// Figure out what is the filter name of the embedded object.
uno::Reference<lang::XServiceInfo> xServiceInfo(xStorable, uno::UNO_QUERY);
OUString aFilter;
- if (xServiceInfo->supportsService("com.sun.star.sheet.SpreadsheetDocument"))
- aFilter = "HTML (StarCalc)";
+ if (xServiceInfo.is())
+ {
+ if (xServiceInfo->supportsService("com.sun.star.sheet.SpreadsheetDocument"))
+ aFilter = "HTML (StarCalc)";
+ else if (xServiceInfo->supportsService("com.sun.star.text.TextDocument"))
+ aFilter = "HTML (StarWriter)";
+ }
- if (!aFilter.isEmpty())
+ if (xStorable.is() && !aFilter.isEmpty())
{
SvMemoryStream aStream;
uno::Reference<io::XOutputStream> xOutputStream(new utl::OStreamWrapper(aStream));
diff --git a/sw/source/filter/html/wrthtml.cxx b/sw/source/filter/html/wrthtml.cxx
index 100518fa03c9..b9464f670949 100644
--- a/sw/source/filter/html/wrthtml.cxx
+++ b/sw/source/filter/html/wrthtml.cxx
@@ -112,6 +112,7 @@ SwHTMLWriter::SwHTMLWriter( const OUString& rBaseURL )
, bPreserveForm( false )
, bCfgNetscape4( false )
, mbSkipImages(false)
+ , mbSkipHeaderFooter(false)
{
SetBaseURL( rBaseURL );
@@ -150,6 +151,10 @@ void SwHTMLWriter::SetupFilterOptions(SfxMedium& rMedium)
{
mbSkipImages = true;
}
+ else if (sFilterOptions == "SkipHeaderFooter")
+ {
+ mbSkipHeaderFooter = true;
+ }
}
sal_uLong SwHTMLWriter::WriteStream()
@@ -391,9 +396,12 @@ sal_uLong SwHTMLWriter::WriteStream()
if( bLFPossible )
OutNewLine();
- HTMLOutFuncs::Out_AsciiTag( Strm(), OOO_STRING_SVTOOLS_HTML_body, false );
- OutNewLine();
- HTMLOutFuncs::Out_AsciiTag( Strm(), OOO_STRING_SVTOOLS_HTML_html, false );
+ if (!mbSkipHeaderFooter)
+ {
+ HTMLOutFuncs::Out_AsciiTag( Strm(), OOO_STRING_SVTOOLS_HTML_body, false );
+ OutNewLine();
+ HTMLOutFuncs::Out_AsciiTag( Strm(), OOO_STRING_SVTOOLS_HTML_html, false );
+ }
// loesche die Tabelle mit den freifliegenden Rahmen
sal_uInt16 i;
@@ -902,6 +910,8 @@ sal_uInt16 SwHTMLWriter::OutHeaderAttrs()
const SwPageDesc *SwHTMLWriter::MakeHeader( sal_uInt16 &rHeaderAttrs )
{
OStringBuffer sOut;
+ if (!mbSkipHeaderFooter)
+ {
sOut.append(OString(OOO_STRING_SVTOOLS_HTML_doctype) + " " + OString(OOO_STRING_SVTOOLS_HTML_doctype40));
HTMLOutFuncs::Out_AsciiTag( Strm(), sOut.makeStringAndClear().getStr() );
@@ -935,6 +945,7 @@ const SwPageDesc *SwHTMLWriter::MakeHeader( sal_uInt16 &rHeaderAttrs )
rHeaderAttrs = OutHeaderAttrs();
OutFootEndNoteInfo();
+ }
const SwPageDesc *pPageDesc = 0;
@@ -964,6 +975,8 @@ const SwPageDesc *SwHTMLWriter::MakeHeader( sal_uInt16 &rHeaderAttrs )
if( !pPageDesc )
pPageDesc = &pDoc->GetPageDesc( 0 );
+ if (!mbSkipHeaderFooter)
+ {
// und nun ... das Style-Sheet!!!
if( bCfgOutStyles )
{
@@ -1015,6 +1028,7 @@ const SwPageDesc *SwHTMLWriter::MakeHeader( sal_uInt16 &rHeaderAttrs )
OutBasicBodyEvents();
Strm().WriteChar( '>' );
+ }
return pPageDesc;
}
diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx
index 70b6f4aa41bf..3de8a593ee90 100644
--- a/sw/source/filter/html/wrthtml.hxx
+++ b/sw/source/filter/html/wrthtml.hxx
@@ -399,6 +399,8 @@ public:
bool bCfgNetscape4 : 1; // Netscape4 Hacks
bool mbSkipImages : 1;
+ /// If HTML header and footer should be written as well, or just the content itself.
+ bool mbSkipHeaderFooter : 1;
// 23