summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomaž Vajngerl <tomaz.vajngerl@collabora.co.uk>2021-08-22 11:43:10 +0900
committerTomaž Vajngerl <quikee@gmail.com>2021-08-23 02:51:23 +0200
commit50f0e8c7880122a05585a2233f6f35d0dfee0385 (patch)
tree3d0850e249a5b9e2e38cf91da2a924ec8b9631dd
parent0224d859b8a497779611cc15943c017ec42f9b90 (diff)
indexing: make indexing XML flat and use simple element names
This changes the indexing XML to be flat-er and change the element names to just be either "paragraph" or an "object", where an "object" then has an attribute what type exactly it is. This makes converting the XML to an indexing engine accepted format easier. Change-Id: Ia8941cc9616a862c1bc980efea5ba2548217644e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/120836 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
-rw-r--r--sw/qa/extras/indexing/IndexingExportTest.cxx167
-rw-r--r--sw/source/filter/indexing/IndexingExport.cxx28
2 files changed, 109 insertions, 86 deletions
diff --git a/sw/qa/extras/indexing/IndexingExportTest.cxx b/sw/qa/extras/indexing/IndexingExportTest.cxx
index 346ff783d2c4..9d40d887f30d 100644
--- a/sw/qa/extras/indexing/IndexingExportTest.cxx
+++ b/sw/qa/extras/indexing/IndexingExportTest.cxx
@@ -107,10 +107,12 @@ void IndexingExportTest::testIndexingExport_Images()
CPPUNIT_ASSERT(pXmlDoc);
assertXPath(pXmlDoc, "/indexing");
- assertXPath(pXmlDoc, "/indexing/graphic[1]", "alt", "Image_NonCaption - Alternative text");
- assertXPath(pXmlDoc, "/indexing/graphic[1]", "name", "Image_NonCaption");
- assertXPath(pXmlDoc, "/indexing/graphic[2]", "alt", "Image_InCaption - Alternative text");
- assertXPath(pXmlDoc, "/indexing/graphic[2]", "name", "Image_InCaption");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "alt", "Image_NonCaption - Alternative text");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Image_NonCaption");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "type", "graphic");
+ assertXPath(pXmlDoc, "/indexing/object[2]", "alt", "Image_InCaption - Alternative text");
+ assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Image_InCaption");
+ assertXPath(pXmlDoc, "/indexing/object[2]", "type", "graphic");
}
void IndexingExportTest::testIndexingExport_OLE()
@@ -127,8 +129,9 @@ void IndexingExportTest::testIndexingExport_OLE()
CPPUNIT_ASSERT(pXmlDoc);
assertXPath(pXmlDoc, "/indexing");
- assertXPath(pXmlDoc, "/indexing/ole[1]", "name", "Object - Chart");
- assertXPath(pXmlDoc, "/indexing/ole[1]", "alt", "Alt Text");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Object - Chart");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "alt", "Alt Text");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "type", "ole");
}
void IndexingExportTest::testIndexingExport_Shapes()
@@ -145,18 +148,22 @@ void IndexingExportTest::testIndexingExport_Shapes()
CPPUNIT_ASSERT(pXmlDoc);
assertXPath(pXmlDoc, "/indexing");
-
- assertXPath(pXmlDoc, "/indexing/shape[1]", "name", "Circle");
- assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[1]", "This is a circle");
- assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[2]", "This is a second paragraph");
-
- assertXPath(pXmlDoc, "/indexing/shape[2]", "name", "Diamond");
- assertXPathContent(pXmlDoc, "/indexing/shape[2]/paragraph[1]", "This is a diamond");
-
- assertXPath(pXmlDoc, "/indexing/shape[3]", "name", "Text Frame 1");
- assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[1]", "This is a TextBox - Para1");
- assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[2]", "Para2");
- assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[3]", "Para3");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "Drawing : Just a Diamond");
+
+ assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Circle");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "type", "shape");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "This is a circle");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[3]", "This is a second paragraph");
+
+ assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Diamond");
+ assertXPath(pXmlDoc, "/indexing/object[2]", "type", "shape");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[4]", "This is a diamond");
+
+ assertXPath(pXmlDoc, "/indexing/object[3]", "name", "Text Frame 1");
+ assertXPath(pXmlDoc, "/indexing/object[3]", "type", "shape");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[5]", "This is a TextBox - Para1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[6]", "Para2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[7]", "Para3");
}
void IndexingExportTest::testIndexingExport_Tables()
@@ -174,51 +181,56 @@ void IndexingExportTest::testIndexingExport_Tables()
assertXPath(pXmlDoc, "/indexing");
- assertXPath(pXmlDoc, "/indexing/table[1]", "name", "Table1");
- assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[1]", "A");
- assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[2]", "B");
- assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[3]", "1");
- assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[4]", "2");
-
- assertXPath(pXmlDoc, "/indexing/table[2]", "name", "Table2");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[1]", "A");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[2]", "B");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[3]", "C");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[4]", "1");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[5]", "10");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[6]", "100");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[7]", "2");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[8]", "20");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[9]", "200");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[10]", "3");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[11]", "30");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[12]", "300");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[13]", "4");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[14]", "40");
- assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[15]", "400");
-
- assertXPath(pXmlDoc, "/indexing/table[3]", "name", "WeirdTable");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[1]", "A1");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[2]", "B1");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[3]", "C1");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[4]", "D1");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[5]", "A2B2");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[6]", "C2D2");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[7]", "A3B3C3D3");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[8]", "A4-1");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[9]", "A4-2");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[10]", "B4-1");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[11]", "C4-1");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[12]", "D4-1");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[13]", "D4-2");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[14]", "");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[15]", "");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[16]", "B4-2");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[17]", "C4-2");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[18]", "");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[19]", "");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[20]", "A5B5C5");
- assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[21]", "D5");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Table1");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "type", "table");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "index", "9");
+ // Search paragraph with parent = 9
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][1]", "A");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][2]", "B");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][3]", "1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][4]", "2");
+
+ assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Table2");
+ assertXPath(pXmlDoc, "/indexing/object[2]", "type", "table");
+ assertXPath(pXmlDoc, "/indexing/object[2]", "index", "24");
+ // Search paragraph with parent = 24
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][1]", "A");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][2]", "B");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][3]", "C");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][4]", "1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][5]", "10");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][6]", "100");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][7]", "2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][8]", "20");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][9]", "200");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][10]", "3");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][11]", "30");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][12]", "300");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][13]", "4");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][14]", "40");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][15]", "400");
+
+ assertXPath(pXmlDoc, "/indexing/object[3]", "name", "WeirdTable");
+ assertXPath(pXmlDoc, "/indexing/object[3]", "type", "table");
+ assertXPath(pXmlDoc, "/indexing/object[3]", "index", "72");
+ // Search paragraph with parent = 72
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][1]", "A1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][2]", "B1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][3]", "C1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][4]", "D1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][5]", "A2B2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][6]", "C2D2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][7]", "A3B3C3D3");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][8]", "A4-1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][9]", "A4-2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][10]", "B4-1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][11]", "C4-1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][12]", "D4-1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][13]", "D4-2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][14]", "B4-2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][15]", "C4-2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][16]", "A5B5C5");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][17]", "D5");
}
void IndexingExportTest::testIndexingExport_Sections()
@@ -236,18 +248,20 @@ void IndexingExportTest::testIndexingExport_Sections()
assertXPath(pXmlDoc, "/indexing");
- assertXPath(pXmlDoc, "/indexing/section[1]", "name", "Section1");
- assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[1]",
- "This is a paragraph in a Section1");
- assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[2]", "Section1 - Paragraph 2");
- assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[3]", "Section1 - Paragraph 3");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Section1");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "type", "section");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "This is a paragraph in a Section1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "Section1 - Paragraph 2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[3]", "Section1 - Paragraph 3");
+
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[4]", "This is a paragraph outside sections");
- assertXPath(pXmlDoc, "/indexing/section[2]", "name", "Section2");
- assertXPathContent(pXmlDoc, "/indexing/section[2]/paragraph[1]", "Section2 - Paragraph 1");
- assertXPathContent(pXmlDoc, "/indexing/section[2]/paragraph[2]", "Section2 - Paragraph 2");
+ assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Section2");
+ assertXPath(pXmlDoc, "/indexing/object[2]", "type", "section");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[5]", "Section2 - Paragraph 1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[6]", "Section2 - Paragraph 2");
- assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "This is a paragraph outside sections");
- assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "This is a paragraph outside sections");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[7]", "This is a paragraph outside sections");
}
void IndexingExportTest::testIndexingExport_Fontwork()
@@ -265,10 +279,11 @@ void IndexingExportTest::testIndexingExport_Fontwork()
assertXPath(pXmlDoc, "/indexing");
- assertXPath(pXmlDoc, "/indexing/shape[1]", "name", "Gray");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Gray");
+ assertXPath(pXmlDoc, "/indexing/object[1]", "type", "shape");
- assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[1]", "Fontwork Text 1");
- assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[2]", "Fontwork Text 2");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "Fontwork Text 1");
+ assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "Fontwork Text 2");
}
void IndexingExportTest::testIndexingExport_Header_Footer()
diff --git a/sw/source/filter/indexing/IndexingExport.cxx b/sw/source/filter/indexing/IndexingExport.cxx
index 834111d88af7..946c4aaddc06 100644
--- a/sw/source/filter/indexing/IndexingExport.cxx
+++ b/sw/source/filter/indexing/IndexingExport.cxx
@@ -69,18 +69,20 @@ public:
void handleOLENode(const SwOLENode* pOleNode)
{
auto pFrameFormat = pOleNode->GetFlyFormat();
- m_rXmlWriter.startElement("ole");
+ m_rXmlWriter.startElement("object");
m_rXmlWriter.attribute("alt", pOleNode->GetTitle());
m_rXmlWriter.attribute("name", pFrameFormat->GetName());
+ m_rXmlWriter.attribute("type", "ole");
m_rXmlWriter.endElement();
}
void handleGraphicNode(const SwGrfNode* pGraphicNode)
{
auto pFrameFormat = pGraphicNode->GetFlyFormat();
- m_rXmlWriter.startElement("graphic");
+ m_rXmlWriter.startElement("object");
m_rXmlWriter.attribute("alt", pGraphicNode->GetTitle());
m_rXmlWriter.attribute("name", pFrameFormat->GetName());
+ m_rXmlWriter.attribute("type", "graphic");
m_rXmlWriter.endElement();
}
@@ -93,6 +95,8 @@ public:
}
const OUString& rString
= pTextNode->GetText().replaceAll(OUStringChar(CH_TXTATR_BREAKWORD), "");
+ if (rString.isEmpty())
+ return;
m_rXmlWriter.startElement("paragraph");
m_rXmlWriter.attribute("index", pTextNode->GetIndex());
m_rXmlWriter.attribute("type", "1");
@@ -106,11 +110,15 @@ public:
{
if (pObject->GetName().isEmpty())
return;
- m_rXmlWriter.startElement("shape");
+
+ m_rXmlWriter.startElement("object");
m_rXmlWriter.attribute("name", pObject->GetName());
m_rXmlWriter.attribute("alt", pObject->GetTitle());
+ m_rXmlWriter.attribute("type", "shape");
m_rXmlWriter.attribute("description", pObject->GetDescription());
+ m_rXmlWriter.endElement();
+
SdrTextObj* pTextObject = dynamic_cast<SdrTextObj*>(pObject);
if (pTextObject)
{
@@ -123,12 +131,11 @@ public:
m_rXmlWriter.startElement("paragraph");
m_rXmlWriter.attribute("index", nParagraph);
m_rXmlWriter.attribute("type", "2");
+ m_rXmlWriter.attribute("parent", pObject->GetName());
m_rXmlWriter.content(sText);
m_rXmlWriter.endElement();
}
}
-
- m_rXmlWriter.endElement();
}
void handleTableNode(SwTableNode* pTableNode)
@@ -136,20 +143,22 @@ public:
const SwTableFormat* pFormat = pTableNode->GetTable().GetFrameFormat();
OUString sName = pFormat->GetName();
- m_rXmlWriter.startElement("table");
+ m_rXmlWriter.startElement("object");
m_rXmlWriter.attribute("index", pTableNode->GetIndex());
- m_rXmlWriter.attribute("type", "1");
m_rXmlWriter.attribute("name", sName);
+ m_rXmlWriter.attribute("type", "table");
+ m_rXmlWriter.endElement();
maNodeStack.push_back(pTableNode);
}
void handleSectionNode(SwSectionNode* pSectionNode)
{
- m_rXmlWriter.startElement("section");
+ m_rXmlWriter.startElement("object");
m_rXmlWriter.attribute("index", pSectionNode->GetIndex());
- m_rXmlWriter.attribute("type", "1");
m_rXmlWriter.attribute("name", pSectionNode->GetSection().GetSectionName());
+ m_rXmlWriter.attribute("type", "section");
+ m_rXmlWriter.endElement();
maNodeStack.push_back(pSectionNode);
}
@@ -159,7 +168,6 @@ public:
if (!maNodeStack.empty() && pEndNode->StartOfSectionNode() == maNodeStack.back())
{
maNodeStack.pop_back();
- m_rXmlWriter.endElement();
}
}
};