summaryrefslogtreecommitdiff
path: root/sw
diff options
context:
space:
mode:
authorTomaž Vajngerl <tomaz.vajngerl@collabora.co.uk>2021-05-31 22:55:02 +0900
committerTomaž Vajngerl <quikee@gmail.com>2021-08-30 09:18:46 +0200
commit4a04be1c48be3d253ccbed374a622f87f6bd9629 (patch)
tree9ec042d913fcfd917768e878870378beedb6c24e /sw
parent09fd7bba57a2e31b3d5df38fb2d2eef2aeae34c0 (diff)
indexing: start of Indexing Export impl. based on (X)HTML export
This adds a indexing output/export to HTML code, which will output a xml document that will be used for indexing of the document. It is based on HTML to reuse the traversal through the doc. model. It is enabled by setting the "IndexingOutput" export parameter. This commit only adds in the groundwork, but the output is still more or less the same as html and the follow up commits will add more indexing specific changes. The only change is to use "indexing" as the top level element and the document is a valid xml (has the xml header). Also add basic test that the indexing output/export works and can be parsed as an xml with "indexing" top level element. Change-Id: I153b1a70da7cbcf0d33b8610d962e6b7ae23ad23 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/116631 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <quikee@gmail.com> (cherry picked from commit ed984f8e68996bcbdcf590814c41479ba76907f2) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/121096 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com>
Diffstat (limited to 'sw')
-rw-r--r--sw/CppunitTest_sw_indexingexport.mk64
-rw-r--r--sw/Module_sw.mk1
-rw-r--r--sw/qa/extras/htmlexport/IndexingExport.cxx44
-rw-r--r--sw/qa/extras/htmlexport/data/IndexingExport_VariousParagraphs.odtbin0 -> 11111 bytes
-rw-r--r--sw/source/filter/html/wrthtml.cxx29
-rw-r--r--sw/source/filter/html/wrthtml.hxx5
6 files changed, 140 insertions, 3 deletions
diff --git a/sw/CppunitTest_sw_indexingexport.mk b/sw/CppunitTest_sw_indexingexport.mk
new file mode 100644
index 000000000000..814e8d849dec
--- /dev/null
+++ b/sw/CppunitTest_sw_indexingexport.mk
@@ -0,0 +1,64 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#*************************************************************************
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+#*************************************************************************
+
+$(eval $(call gb_CppunitTest_CppunitTest,sw_indexingexport))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sw_indexingexport))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sw_indexingexport, \
+ sw/qa/extras/htmlexport/IndexingExport \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sw_indexingexport, \
+ comphelper \
+ cppu \
+ cppuhelper \
+ i18nlangtag \
+ msfilter \
+ sal \
+ sfx \
+ sot \
+ sw \
+ swqahelper \
+ svl \
+ svt \
+ test \
+ tl \
+ unotest \
+ utl \
+ vcl \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sw_indexingexport,\
+ boost_headers \
+ libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sw_indexingexport,\
+ -I$(SRCDIR)/sw/inc \
+ -I$(SRCDIR)/sw/source/core/inc \
+ -I$(SRCDIR)/sw/source/uibase/inc \
+ -I$(SRCDIR)/sw/qa/inc \
+ $$(INCLUDE) \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sw_indexingexport,\
+ udkapi \
+ offapi \
+ oovbaapi \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,sw_indexingexport))
+$(eval $(call gb_CppunitTest_use_vcl,sw_indexingexport))
+$(eval $(call gb_CppunitTest_use_rdb,sw_indexingexport,services))
+$(eval $(call gb_CppunitTest_use_configuration,sw_indexingexport))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk
index 26f8bd797206..cddc7cdc4f14 100644
--- a/sw/Module_sw.mk
+++ b/sw/Module_sw.mk
@@ -66,6 +66,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sw,\
CppunitTest_sw_htmlexport \
CppunitTest_sw_xhtmlexport \
CppunitTest_sw_htmlimport \
+ CppunitTest_sw_indexingexport \
CppunitTest_sw_macros_test \
CppunitTest_sw_ooxmlexport \
CppunitTest_sw_ooxmlexport2 \
diff --git a/sw/qa/extras/htmlexport/IndexingExport.cxx b/sw/qa/extras/htmlexport/IndexingExport.cxx
new file mode 100644
index 000000000000..f10c6ec08470
--- /dev/null
+++ b/sw/qa/extras/htmlexport/IndexingExport.cxx
@@ -0,0 +1,44 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <memory>
+#include <swmodeltestbase.hxx>
+
+#include <test/htmltesttools.hxx>
+
+class IndexingExportTest : public SwModelTestBase
+{
+public:
+ IndexingExportTest()
+ : SwModelTestBase("/sw/qa/extras/htmlexport/data/", "HTML (StarWriter)")
+ {
+ }
+
+private:
+ virtual std::unique_ptr<Resetter> preTest(const char*) override
+ {
+ setFilterOptions("IndexingOutput");
+ return nullptr;
+ }
+};
+
+#define DECLARE_INDEXINGEXPORT_TEST(TestName, filename) \
+ DECLARE_SW_EXPORT_TEST(TestName, filename, nullptr, IndexingExportTest)
+
+DECLARE_INDEXINGEXPORT_TEST(testIndexingSimpleParagraph, "IndexingExport_VariousParagraphs.odt")
+{
+ xmlDocUniquePtr pDoc = parseXml(maTempFile);
+ CPPUNIT_ASSERT(pDoc);
+
+ assertXPath(pDoc, "/indexing", 1);
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/qa/extras/htmlexport/data/IndexingExport_VariousParagraphs.odt b/sw/qa/extras/htmlexport/data/IndexingExport_VariousParagraphs.odt
new file mode 100644
index 000000000000..eb8904720f29
--- /dev/null
+++ b/sw/qa/extras/htmlexport/data/IndexingExport_VariousParagraphs.odt
Binary files differ
diff --git a/sw/source/filter/html/wrthtml.cxx b/sw/source/filter/html/wrthtml.cxx
index 2071271e580c..1bffc2030e44 100644
--- a/sw/source/filter/html/wrthtml.cxx
+++ b/sw/source/filter/html/wrthtml.cxx
@@ -79,6 +79,7 @@
#include <xmloff/odffields.hxx>
#include <tools/urlobj.hxx>
#include <osl/file.hxx>
+#include <tools/stream.hxx>
#include <comphelper/scopeguard.hxx>
#include <unotools/tempfile.hxx>
#include <comphelper/sequenceashashmap.hxx>
@@ -150,6 +151,7 @@ SwHTMLWriter::SwHTMLWriter( const OUString& rBaseURL, const OUString& rFilterOpt
, mbSkipImages(false)
, mbSkipHeaderFooter(false)
, mbEmbedImages(false)
+ , mbIndexingOutput(false)
, m_bCfgPrintLayout( false )
, m_bParaDotLeaders( false )
{
@@ -218,6 +220,13 @@ void SwHTMLWriter::SetupFilterOptions(const OUString& rFilterOptions)
{
mbEmbedImages = true;
}
+ else if (rFilterOptions == "IndexingOutput")
+ {
+ mbIndexingOutput = true;
+ mbSkipHeaderFooter = true;
+ mbSkipImages = true;
+ mbXHTML = true;
+ }
const uno::Sequence<OUString> aOptionSeq = comphelper::string::convertCommaSeparated(rFilterOptions);
const OUString aXhtmlNsKey("xhtmlns=");
@@ -259,6 +268,8 @@ ErrCode SwHTMLWriter::WriteStream()
}
comphelper::ScopeGuard g([this, pOldPasteStream] { this->SetStream(pOldPasteStream); });
+ HtmlWriter aHtmlWriter(Strm(), GetNamespace());
+
SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
// font heights 1-7
@@ -443,7 +454,7 @@ ErrCode SwHTMLWriter::WriteStream()
CollectLinkTargets();
sal_uInt16 nHeaderAttrs = 0;
- m_pCurrPageDesc = MakeHeader( nHeaderAttrs );
+ m_pCurrPageDesc = MakeHeader(aHtmlWriter, nHeaderAttrs);
m_bLFPossible = true;
@@ -495,8 +506,14 @@ ErrCode SwHTMLWriter::WriteStream()
HTMLOutFuncs::Out_AsciiTag( Strm(), GetNamespace() + OOO_STRING_SVTOOLS_HTML_html, false );
}
else if (mbReqIF)
+ {
// ReqIF: end xhtml.BlkStruct.class.
HTMLOutFuncs::Out_AsciiTag(Strm(), GetNamespace() + OOO_STRING_SVTOOLS_HTML_division, false);
+ }
+ else if (mbIndexingOutput)
+ {
+ aHtmlWriter.end();
+ }
// delete the table with floating frames
OSL_ENSURE( !m_pHTMLPosFlyFrames, "Were not all frames output?" );
@@ -978,7 +995,7 @@ sal_uInt16 SwHTMLWriter::OutHeaderAttrs()
return nAttrs;
}
-const SwPageDesc *SwHTMLWriter::MakeHeader( sal_uInt16 &rHeaderAttrs )
+const SwPageDesc* SwHTMLWriter::MakeHeader(HtmlWriter & rHtmlWriter, sal_uInt16 &rHeaderAttrs )
{
OStringBuffer sOut;
if (!mbSkipHeaderFooter)
@@ -1020,6 +1037,14 @@ const SwPageDesc *SwHTMLWriter::MakeHeader( sal_uInt16 &rHeaderAttrs )
OutFootEndNoteInfo();
}
+ else if (mbIndexingOutput)
+ {
+ Strm().WriteCharPtr("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+ Strm().WriteCharPtr(SAL_NEWLINE_STRING);
+ rHtmlWriter.start("indexing");
+ rHtmlWriter.characters("");
+ Strm().WriteCharPtr(SAL_NEWLINE_STRING);
+ }
const SwPageDesc *pPageDesc = nullptr;
diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx
index f82325ee50ae..abe6968e8488 100644
--- a/sw/source/filter/html/wrthtml.hxx
+++ b/sw/source/filter/html/wrthtml.hxx
@@ -32,6 +32,7 @@
#include <o3tl/typed_flags_set.hxx>
#include <rtl/ref.hxx>
#include <svtools/htmlout.hxx>
+#include <svtools/HtmlWriter.hxx>
#include <tools/fldunit.hxx>
#include <shellio.hxx>
@@ -263,7 +264,7 @@ class SW_DLLPUBLIC SwHTMLWriter : public Writer
FieldUnit m_eCSS1Unit;
sal_uInt16 OutHeaderAttrs();
- const SwPageDesc *MakeHeader( sal_uInt16& rHeaderAtrs );
+ const SwPageDesc* MakeHeader(HtmlWriter & rXmlWriter, sal_uInt16& rHeaderAtrs);
void GetControls();
void AddLinkTarget( const OUString& rURL );
@@ -396,6 +397,8 @@ public:
OString maNamespace;
/// If the ReqIF subset of XHTML should be written.
bool mbReqIF = false;
+ /// Indexing output.
+ bool mbIndexingOutput : 1;
#define sCSS2_P_CLASS_leaders "leaders"
bool m_bCfgPrintLayout : 1; // PrintLayout option for TOC dot leaders