From 50f0e8c7880122a05585a2233f6f35d0dfee0385 Mon Sep 17 00:00:00 2001 From: Tomaž Vajngerl Date: Sun, 22 Aug 2021 11:43:10 +0900 Subject: indexing: make indexing XML flat and use simple element names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This changes the indexing XML to be flat-er and change the element names to just be either "paragraph" or an "object", where an "object" then has an attribute what type exactly it is. This makes converting the XML to an indexing engine accepted format easier. Change-Id: Ia8941cc9616a862c1bc980efea5ba2548217644e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/120836 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl --- sw/qa/extras/indexing/IndexingExportTest.cxx | 167 +++++++++++++++------------ sw/source/filter/indexing/IndexingExport.cxx | 28 +++-- 2 files changed, 109 insertions(+), 86 deletions(-) diff --git a/sw/qa/extras/indexing/IndexingExportTest.cxx b/sw/qa/extras/indexing/IndexingExportTest.cxx index 346ff783d2c4..9d40d887f30d 100644 --- a/sw/qa/extras/indexing/IndexingExportTest.cxx +++ b/sw/qa/extras/indexing/IndexingExportTest.cxx @@ -107,10 +107,12 @@ void IndexingExportTest::testIndexingExport_Images() CPPUNIT_ASSERT(pXmlDoc); assertXPath(pXmlDoc, "/indexing"); - assertXPath(pXmlDoc, "/indexing/graphic[1]", "alt", "Image_NonCaption - Alternative text"); - assertXPath(pXmlDoc, "/indexing/graphic[1]", "name", "Image_NonCaption"); - assertXPath(pXmlDoc, "/indexing/graphic[2]", "alt", "Image_InCaption - Alternative text"); - assertXPath(pXmlDoc, "/indexing/graphic[2]", "name", "Image_InCaption"); + assertXPath(pXmlDoc, "/indexing/object[1]", "alt", "Image_NonCaption - Alternative text"); + assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Image_NonCaption"); + assertXPath(pXmlDoc, "/indexing/object[1]", "type", "graphic"); + assertXPath(pXmlDoc, "/indexing/object[2]", "alt", "Image_InCaption - Alternative text"); + assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Image_InCaption"); + assertXPath(pXmlDoc, "/indexing/object[2]", "type", "graphic"); } void IndexingExportTest::testIndexingExport_OLE() @@ -127,8 +129,9 @@ void IndexingExportTest::testIndexingExport_OLE() CPPUNIT_ASSERT(pXmlDoc); assertXPath(pXmlDoc, "/indexing"); - assertXPath(pXmlDoc, "/indexing/ole[1]", "name", "Object - Chart"); - assertXPath(pXmlDoc, "/indexing/ole[1]", "alt", "Alt Text"); + assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Object - Chart"); + assertXPath(pXmlDoc, "/indexing/object[1]", "alt", "Alt Text"); + assertXPath(pXmlDoc, "/indexing/object[1]", "type", "ole"); } void IndexingExportTest::testIndexingExport_Shapes() @@ -145,18 +148,22 @@ void IndexingExportTest::testIndexingExport_Shapes() CPPUNIT_ASSERT(pXmlDoc); assertXPath(pXmlDoc, "/indexing"); - - assertXPath(pXmlDoc, "/indexing/shape[1]", "name", "Circle"); - assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[1]", "This is a circle"); - assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[2]", "This is a second paragraph"); - - assertXPath(pXmlDoc, "/indexing/shape[2]", "name", "Diamond"); - assertXPathContent(pXmlDoc, "/indexing/shape[2]/paragraph[1]", "This is a diamond"); - - assertXPath(pXmlDoc, "/indexing/shape[3]", "name", "Text Frame 1"); - assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[1]", "This is a TextBox - Para1"); - assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[2]", "Para2"); - assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[3]", "Para3"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "Drawing : Just a Diamond"); + + assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Circle"); + assertXPath(pXmlDoc, "/indexing/object[1]", "type", "shape"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "This is a circle"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[3]", "This is a second paragraph"); + + assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Diamond"); + assertXPath(pXmlDoc, "/indexing/object[2]", "type", "shape"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[4]", "This is a diamond"); + + assertXPath(pXmlDoc, "/indexing/object[3]", "name", "Text Frame 1"); + assertXPath(pXmlDoc, "/indexing/object[3]", "type", "shape"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[5]", "This is a TextBox - Para1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[6]", "Para2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[7]", "Para3"); } void IndexingExportTest::testIndexingExport_Tables() @@ -174,51 +181,56 @@ void IndexingExportTest::testIndexingExport_Tables() assertXPath(pXmlDoc, "/indexing"); - assertXPath(pXmlDoc, "/indexing/table[1]", "name", "Table1"); - assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[1]", "A"); - assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[2]", "B"); - assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[3]", "1"); - assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[4]", "2"); - - assertXPath(pXmlDoc, "/indexing/table[2]", "name", "Table2"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[1]", "A"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[2]", "B"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[3]", "C"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[4]", "1"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[5]", "10"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[6]", "100"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[7]", "2"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[8]", "20"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[9]", "200"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[10]", "3"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[11]", "30"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[12]", "300"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[13]", "4"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[14]", "40"); - assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[15]", "400"); - - assertXPath(pXmlDoc, "/indexing/table[3]", "name", "WeirdTable"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[1]", "A1"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[2]", "B1"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[3]", "C1"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[4]", "D1"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[5]", "A2B2"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[6]", "C2D2"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[7]", "A3B3C3D3"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[8]", "A4-1"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[9]", "A4-2"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[10]", "B4-1"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[11]", "C4-1"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[12]", "D4-1"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[13]", "D4-2"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[14]", ""); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[15]", ""); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[16]", "B4-2"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[17]", "C4-2"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[18]", ""); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[19]", ""); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[20]", "A5B5C5"); - assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[21]", "D5"); + assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Table1"); + assertXPath(pXmlDoc, "/indexing/object[1]", "type", "table"); + assertXPath(pXmlDoc, "/indexing/object[1]", "index", "9"); + // Search paragraph with parent = 9 + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][1]", "A"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][2]", "B"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][3]", "1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][4]", "2"); + + assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Table2"); + assertXPath(pXmlDoc, "/indexing/object[2]", "type", "table"); + assertXPath(pXmlDoc, "/indexing/object[2]", "index", "24"); + // Search paragraph with parent = 24 + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][1]", "A"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][2]", "B"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][3]", "C"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][4]", "1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][5]", "10"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][6]", "100"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][7]", "2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][8]", "20"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][9]", "200"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][10]", "3"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][11]", "30"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][12]", "300"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][13]", "4"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][14]", "40"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][15]", "400"); + + assertXPath(pXmlDoc, "/indexing/object[3]", "name", "WeirdTable"); + assertXPath(pXmlDoc, "/indexing/object[3]", "type", "table"); + assertXPath(pXmlDoc, "/indexing/object[3]", "index", "72"); + // Search paragraph with parent = 72 + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][1]", "A1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][2]", "B1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][3]", "C1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][4]", "D1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][5]", "A2B2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][6]", "C2D2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][7]", "A3B3C3D3"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][8]", "A4-1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][9]", "A4-2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][10]", "B4-1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][11]", "C4-1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][12]", "D4-1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][13]", "D4-2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][14]", "B4-2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][15]", "C4-2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][16]", "A5B5C5"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][17]", "D5"); } void IndexingExportTest::testIndexingExport_Sections() @@ -236,18 +248,20 @@ void IndexingExportTest::testIndexingExport_Sections() assertXPath(pXmlDoc, "/indexing"); - assertXPath(pXmlDoc, "/indexing/section[1]", "name", "Section1"); - assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[1]", - "This is a paragraph in a Section1"); - assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[2]", "Section1 - Paragraph 2"); - assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[3]", "Section1 - Paragraph 3"); + assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Section1"); + assertXPath(pXmlDoc, "/indexing/object[1]", "type", "section"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "This is a paragraph in a Section1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "Section1 - Paragraph 2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[3]", "Section1 - Paragraph 3"); + + assertXPathContent(pXmlDoc, "/indexing/paragraph[4]", "This is a paragraph outside sections"); - assertXPath(pXmlDoc, "/indexing/section[2]", "name", "Section2"); - assertXPathContent(pXmlDoc, "/indexing/section[2]/paragraph[1]", "Section2 - Paragraph 1"); - assertXPathContent(pXmlDoc, "/indexing/section[2]/paragraph[2]", "Section2 - Paragraph 2"); + assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Section2"); + assertXPath(pXmlDoc, "/indexing/object[2]", "type", "section"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[5]", "Section2 - Paragraph 1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[6]", "Section2 - Paragraph 2"); - assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "This is a paragraph outside sections"); - assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "This is a paragraph outside sections"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[7]", "This is a paragraph outside sections"); } void IndexingExportTest::testIndexingExport_Fontwork() @@ -265,10 +279,11 @@ void IndexingExportTest::testIndexingExport_Fontwork() assertXPath(pXmlDoc, "/indexing"); - assertXPath(pXmlDoc, "/indexing/shape[1]", "name", "Gray"); + assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Gray"); + assertXPath(pXmlDoc, "/indexing/object[1]", "type", "shape"); - assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[1]", "Fontwork Text 1"); - assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[2]", "Fontwork Text 2"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "Fontwork Text 1"); + assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "Fontwork Text 2"); } void IndexingExportTest::testIndexingExport_Header_Footer() diff --git a/sw/source/filter/indexing/IndexingExport.cxx b/sw/source/filter/indexing/IndexingExport.cxx index 834111d88af7..946c4aaddc06 100644 --- a/sw/source/filter/indexing/IndexingExport.cxx +++ b/sw/source/filter/indexing/IndexingExport.cxx @@ -69,18 +69,20 @@ public: void handleOLENode(const SwOLENode* pOleNode) { auto pFrameFormat = pOleNode->GetFlyFormat(); - m_rXmlWriter.startElement("ole"); + m_rXmlWriter.startElement("object"); m_rXmlWriter.attribute("alt", pOleNode->GetTitle()); m_rXmlWriter.attribute("name", pFrameFormat->GetName()); + m_rXmlWriter.attribute("type", "ole"); m_rXmlWriter.endElement(); } void handleGraphicNode(const SwGrfNode* pGraphicNode) { auto pFrameFormat = pGraphicNode->GetFlyFormat(); - m_rXmlWriter.startElement("graphic"); + m_rXmlWriter.startElement("object"); m_rXmlWriter.attribute("alt", pGraphicNode->GetTitle()); m_rXmlWriter.attribute("name", pFrameFormat->GetName()); + m_rXmlWriter.attribute("type", "graphic"); m_rXmlWriter.endElement(); } @@ -93,6 +95,8 @@ public: } const OUString& rString = pTextNode->GetText().replaceAll(OUStringChar(CH_TXTATR_BREAKWORD), ""); + if (rString.isEmpty()) + return; m_rXmlWriter.startElement("paragraph"); m_rXmlWriter.attribute("index", pTextNode->GetIndex()); m_rXmlWriter.attribute("type", "1"); @@ -106,11 +110,15 @@ public: { if (pObject->GetName().isEmpty()) return; - m_rXmlWriter.startElement("shape"); + + m_rXmlWriter.startElement("object"); m_rXmlWriter.attribute("name", pObject->GetName()); m_rXmlWriter.attribute("alt", pObject->GetTitle()); + m_rXmlWriter.attribute("type", "shape"); m_rXmlWriter.attribute("description", pObject->GetDescription()); + m_rXmlWriter.endElement(); + SdrTextObj* pTextObject = dynamic_cast(pObject); if (pTextObject) { @@ -123,12 +131,11 @@ public: m_rXmlWriter.startElement("paragraph"); m_rXmlWriter.attribute("index", nParagraph); m_rXmlWriter.attribute("type", "2"); + m_rXmlWriter.attribute("parent", pObject->GetName()); m_rXmlWriter.content(sText); m_rXmlWriter.endElement(); } } - - m_rXmlWriter.endElement(); } void handleTableNode(SwTableNode* pTableNode) @@ -136,20 +143,22 @@ public: const SwTableFormat* pFormat = pTableNode->GetTable().GetFrameFormat(); OUString sName = pFormat->GetName(); - m_rXmlWriter.startElement("table"); + m_rXmlWriter.startElement("object"); m_rXmlWriter.attribute("index", pTableNode->GetIndex()); - m_rXmlWriter.attribute("type", "1"); m_rXmlWriter.attribute("name", sName); + m_rXmlWriter.attribute("type", "table"); + m_rXmlWriter.endElement(); maNodeStack.push_back(pTableNode); } void handleSectionNode(SwSectionNode* pSectionNode) { - m_rXmlWriter.startElement("section"); + m_rXmlWriter.startElement("object"); m_rXmlWriter.attribute("index", pSectionNode->GetIndex()); - m_rXmlWriter.attribute("type", "1"); m_rXmlWriter.attribute("name", pSectionNode->GetSection().GetSectionName()); + m_rXmlWriter.attribute("type", "section"); + m_rXmlWriter.endElement(); maNodeStack.push_back(pSectionNode); } @@ -159,7 +168,6 @@ public: if (!maNodeStack.empty() && pEndNode->StartOfSectionNode() == maNodeStack.back()) { maNodeStack.pop_back(); - m_rXmlWriter.endElement(); } } }; -- cgit v1.2.3