summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFridrich Štrba <fridrich.strba@bluewin.ch>2017-01-31 17:12:11 +0100
committerFridrich Štrba <fridrich.strba@bluewin.ch>2017-01-31 17:12:11 +0100
commit94f36d00499808d7588a0970ce0dc7470d1245c7 (patch)
treee76dfd16845d53ed1a7bfa7e4eef1732a714dc4b
parentdc7a4e0f70800fd747eaa7fef3e606d6bb9e5177 (diff)
Refactoring to get the text encoding from the font information
Change-Id: I2261310cf4ef2635e44cc80db809e7ca9fc4409f
-rw-r--r--src/lib/VSDContentCollector.cpp504
-rw-r--r--src/lib/VSDContentCollector.h2
-rw-r--r--src/lib/VSDTypes.h5
-rw-r--r--src/lib/libvisio_utils.cpp1
4 files changed, 374 insertions, 138 deletions
diff --git a/src/lib/VSDContentCollector.cpp b/src/lib/VSDContentCollector.cpp
index eb9a366..e201809 100644
--- a/src/lib/VSDContentCollector.cpp
+++ b/src/lib/VSDContentCollector.cpp
@@ -256,7 +256,14 @@ void libvisio::VSDContentCollector::_flushShape()
if (m_currentForeignData.size() && m_currentForeignProps["librevenge:mime-type"] && m_foreignWidth != 0.0 && m_foreignHeight != 0.0)
numForeignElements++;
if (!m_currentText.empty())
- numTextElements++;
+ {
+ if ((m_currentText.m_format == VSD_TEXT_UTF16
+ && (m_currentText.m_data.size() >= 2 && (m_currentText.m_data.getDataBuffer()[0] || m_currentText.m_data.getDataBuffer()[1])))
+ || m_currentText.m_data.getDataBuffer()[0])
+ {
+ numTextElements++;
+ }
+ }
if (numPathElements+numForeignElements+numTextElements > 1)
{
@@ -591,6 +598,19 @@ void libvisio::VSDContentCollector::_flushText()
/* Do not output empty text objects. */
if (m_currentText.empty() || m_misc.m_hideText)
return;
+ else
+ // Check whether the buffer contains only the terminating NULL character
+ {
+ if (m_currentText.m_format == VSD_TEXT_UTF16)
+ {
+ if (m_currentText.m_data.size() < 2)
+ return;
+ else if (!(m_currentText.m_data.getDataBuffer()[0]) && !(m_currentText.m_data.getDataBuffer()[1]))
+ return;
+ }
+ else if (!(m_currentText.m_data.getDataBuffer()[0]))
+ return;
+ }
/* Fill the text object/frame properties */
double xmiddle = m_txtxform ? m_txtxform->width / 2.0 : m_xform.width / 2.0;
@@ -679,172 +699,381 @@ void libvisio::VSDContentCollector::_flushText()
unsigned charNumRemaining(charIt->charCount);
unsigned tabNumRemaining(tabIt->m_numChars);
+ std::vector<unsigned char> sOutputVector;
librevenge::RVNGString sOutputText;
- /* Iterate over the text character by character */
- librevenge::RVNGString::Iter textIt(m_currentText);
- for (textIt.rewind(); textIt.next();)
+ // Unfortunately, we have to handle the unicode formats differently then the 8-bit formats
+ if (m_currentText.m_format == VSD_TEXT_UTF8 || m_currentText.m_format == VSD_TEXT_UTF16)
{
- /* Any character will cause a paragraph to open if it is not yet opened. */
- if (!isParagraphOpened)
+ std::vector<unsigned char> tmpBuffer(m_currentText.m_data.size());
+ memcpy(&tmpBuffer[0], m_currentText.m_data.getDataBuffer(), m_currentText.m_data.size());
+ librevenge::RVNGString textString;
+ appendCharacters(textString, tmpBuffer, m_currentText.m_format);
+ /* Iterate over the text character by character */
+ librevenge::RVNGString::Iter textIt(textString);
+ for (textIt.rewind(); textIt.next();)
{
- librevenge::RVNGPropertyList paraProps;
- _fillParagraphProperties(paraProps, *paraIt);
+ /* Any character will cause a paragraph to open if it is not yet opened. */
+ if (!isParagraphOpened)
+ {
+ librevenge::RVNGPropertyList paraProps;
+ _fillParagraphProperties(paraProps, *paraIt);
+
+ if (m_textBlockStyle.defaultTabStop > 0.0)
+ paraProps.insert("style:tab-stop-distance", m_textBlockStyle.defaultTabStop);
+
+ _fillTabSet(paraProps, *tabIt);
- if (m_textBlockStyle.defaultTabStop > 0.0)
- paraProps.insert("style:tab-stop-distance", m_textBlockStyle.defaultTabStop);
+ VSDBullet bullet;
+ _bulletFromParaFormat(bullet, *paraIt);
- _fillTabSet(paraProps, *tabIt);
+ /* Bullet definition changed with regard to the last paragraph style. */
+ if (bullet != currentBullet)
+ {
+ /* If the previous paragraph style had a bullet, close the list level. */
+ if (!!currentBullet)
+ m_shapeOutputText->addCloseUnorderedListLevel();
- VSDBullet bullet;
- _bulletFromParaFormat(bullet, *paraIt);
+ currentBullet = bullet;
+ /* If the current paragraph style has a bullet, open a new list level. */
+ if (!!currentBullet)
+ {
+ librevenge::RVNGPropertyList bulletList;
+ _listLevelFromBullet(bulletList, currentBullet);
+ m_shapeOutputText->addOpenUnorderedListLevel(bulletList);
+ }
+ }
- /* Bullet definition changed with regard to the last paragraph style. */
- if (bullet != currentBullet)
+ if (!currentBullet)
+ m_shapeOutputText->addOpenParagraph(paraProps);
+ else
+ m_shapeOutputText->addOpenListElement(paraProps);
+ isParagraphOpened = true;
+ isParagraphWithoutSpan = true;
+ }
+
+ /* Any character will cause a span to open if it is not yet opened.
+ * The additional conditions aim to avoid superfluous empty span but
+ * also a paragraph without span at all. */
+ if (!isSpanOpened && ((*(textIt()) != '\n') || isParagraphWithoutSpan))
{
- /* If the previous paragraph style had a bullet, close the list level. */
- if (!!currentBullet)
- m_shapeOutputText->addCloseUnorderedListLevel();
+ librevenge::RVNGPropertyList textProps;
+ _fillCharProperties(textProps, *charIt);
- currentBullet = bullet;
- /* If the current paragraph style has a bullet, open a new list level. */
- if (!!currentBullet)
+ // TODO: In draw, text span background cannot be specified the same way as in writer span
+ if (m_textBlockStyle.isTextBkgndFilled)
{
- librevenge::RVNGPropertyList bulletList;
- _listLevelFromBullet(bulletList, currentBullet);
- m_shapeOutputText->addOpenUnorderedListLevel(bulletList);
+ textProps.insert("fo:background-color", getColourString(m_textBlockStyle.textBkgndColour));
+#if 0
+ if (m_textBlockStyle.textBkgndColour.a)
+ textProps.insert("fo:background-opacity", 1.0 - m_textBlockStyle.textBkgndColour.a/255.0, librevenge::RVNG_PERCENT);
+#endif
}
+ m_shapeOutputText->addOpenSpan(textProps);
+ isSpanOpened = true;
+ isParagraphWithoutSpan = false;
}
- if (!currentBullet)
- m_shapeOutputText->addOpenParagraph(paraProps);
- else
- m_shapeOutputText->addOpenListElement(paraProps);
- isParagraphOpened = true;
- isParagraphWithoutSpan = true;
- }
-
- /* Any character will cause a span to open if it is not yet opened.
- * The additional conditions aim to avoid superfluous empty span but
- * also a paragraph without span at all. */
- if (!isSpanOpened && ((*(textIt()) != '\n') || isParagraphWithoutSpan))
- {
- librevenge::RVNGPropertyList textProps;
- _fillCharProperties(textProps, *charIt);
+ /* Current character is a paragraph break,
+ * which will cause the paragraph to close. */
+ if (*(textIt()) == '\n')
+ {
+ if (!sOutputText.empty())
+ m_shapeOutputText->addInsertText(sOutputText);
+ sOutputText.clear();
+ if (isSpanOpened)
+ {
+ m_shapeOutputText->addCloseSpan();
+ isSpanOpened = false;
+ }
- // TODO: In draw, text span background cannot be specified the same way as in writer span
- if (m_textBlockStyle.isTextBkgndFilled)
+ if (isParagraphOpened)
+ {
+ if (!currentBullet)
+ m_shapeOutputText->addCloseParagraph();
+ else
+ m_shapeOutputText->addCloseListElement();
+ isParagraphOpened = false;
+ }
+ }
+ /* Current character is a tabulator. We have to output
+ * the current text buffer and insert the tab. */
+ else if (*(textIt()) == '\t')
{
- textProps.insert("fo:background-color", getColourString(m_textBlockStyle.textBkgndColour));
-#if 0
- if (m_textBlockStyle.textBkgndColour.a)
- textProps.insert("fo:background-opacity", 1.0 - m_textBlockStyle.textBkgndColour.a/255.0, librevenge::RVNG_PERCENT);
-#endif
+ if (!sOutputText.empty())
+ m_shapeOutputText->addInsertText(sOutputText);
+ sOutputText.clear();
+ m_shapeOutputText->addInsertTab();
+ }
+ /* Current character is a field placeholder. We append
+ * to the current text buffer a text representation
+ * of the field. */
+ else if (strlen(textIt()) == 3 &&
+ textIt()[0] == '\xef' &&
+ textIt()[1] == '\xbf' &&
+ textIt()[2] == '\xbc')
+ _appendField(sOutputText);
+ /* We have a normal UTF8 character and we append it
+ * to the current text buffer. */
+ else
+ sOutputText.append(textIt());
+
+ /* Decrease the count of remaining characters in the same paragraph,
+ * if it is possible. */
+ if (paraNumRemaining)
+ paraNumRemaining--;
+ /* Fetch next paragraph style if it exists. If not, just use the
+ * last one. */
+ if (!paraNumRemaining)
+ {
+ ++paraIt;
+ if (paraIt != m_paraFormats.end())
+ paraNumRemaining = paraIt->charCount;
+ else
+ --paraIt;
}
- m_shapeOutputText->addOpenSpan(textProps);
- isSpanOpened = true;
- isParagraphWithoutSpan = false;
- }
- /* Current character is a paragraph break,
- * which will cause the paragraph to close. */
- if (*(textIt()) == '\n')
- {
- if (!sOutputText.empty())
- m_shapeOutputText->addInsertText(sOutputText);
- sOutputText.clear();
- if (isSpanOpened)
+ /* Decrease the count of remaining characters in the same span,
+ * if it is possible. */
+ if (charNumRemaining)
+ charNumRemaining--;
+ /* Fetch next character style if it exists and close span, since
+ * the next span will have to use the new character style.
+ * If there is no more character style to fetch, just finish using
+ * the last one. */
+ if (!charNumRemaining)
{
- m_shapeOutputText->addCloseSpan();
- isSpanOpened = false;
+ ++charIt;
+ if (charIt != m_charFormats.end())
+ {
+ charNumRemaining = charIt->charCount;
+ if (isSpanOpened)
+ {
+ if (!sOutputText.empty())
+ m_shapeOutputText->addInsertText(sOutputText);
+ sOutputText.clear();
+ m_shapeOutputText->addCloseSpan();
+ isSpanOpened = false;
+ }
+ }
+ else
+ --charIt;
}
- if (isParagraphOpened)
+ /* Decrease the count of remaining characters using the same
+ * tab-set definition, if it is possible. */
+ if (tabNumRemaining)
+ tabNumRemaining--;
+ /* Fetch next tab-set definition if it exists. If not, just use the
+ * last one. */
+ if (!tabNumRemaining)
{
- if (!currentBullet)
- m_shapeOutputText->addCloseParagraph();
+ ++tabIt;
+ if (tabIt != m_tabSets.end())
+ tabNumRemaining = tabIt->m_numChars;
else
- m_shapeOutputText->addCloseListElement();
- isParagraphOpened = false;
+ --tabIt;
}
}
- /* Current character is a tabulator. We have to output
- * the current text buffer and insert the tab. */
- else if (*(textIt()) == '\t')
+ }
+ else // 8-bit charsets
+ {
+ /* Iterate over the text character by character */
+ const unsigned char *tmpBuffer = m_currentText.m_data.getDataBuffer();
+ unsigned long tmpBufferLength = m_currentText.m_data.size();
+ // Remove the terminating \0 character from the buffer
+ while (tmpBufferLength > 1 &&!tmpBuffer[tmpBufferLength-1])
{
- if (!sOutputText.empty())
- m_shapeOutputText->addInsertText(sOutputText);
- sOutputText.clear();
- m_shapeOutputText->addInsertTab();
- }
- /* Current character is a field placeholder. We append
- * to the current text buffer a text representation
- * of the field. */
- else if (strlen(textIt()) == 3 &&
- textIt()[0] == '\xef' &&
- textIt()[1] == '\xbf' &&
- textIt()[2] == '\xbc')
- _appendField(sOutputText);
- /* We have a normal UTF8 character and we append it
- * to the current text buffer. */
- else
- sOutputText.append(textIt());
-
- /* Decrease the count of remaining characters in the same paragraph,
- * if it is possible. */
- if (paraNumRemaining)
- paraNumRemaining--;
- /* Fetch next paragraph style if it exists. If not, just use the
- * last one. */
- if (!paraNumRemaining)
- {
- ++paraIt;
- if (paraIt != m_paraFormats.end())
- paraNumRemaining = paraIt->charCount;
- else
- --paraIt;
+ --tmpBufferLength;
}
-
- /* Decrease the count of remaining characters in the same span,
- * if it is possible. */
- if (charNumRemaining)
- charNumRemaining--;
- /* Fetch next character style if it exists and close span, since
- * the next span will have to use the new character style.
- * If there is no more character style to fetch, just finish using
- * the last one. */
- if (!charNumRemaining)
+ for (unsigned long i = 0; i < tmpBufferLength; ++i)
{
- ++charIt;
- if (charIt != m_charFormats.end())
+ /* Any character will cause a paragraph to open if it is not yet opened. */
+ if (!isParagraphOpened)
{
- charNumRemaining = charIt->charCount;
- if (isSpanOpened)
+ librevenge::RVNGPropertyList paraProps;
+ _fillParagraphProperties(paraProps, *paraIt);
+
+ if (m_textBlockStyle.defaultTabStop > 0.0)
+ paraProps.insert("style:tab-stop-distance", m_textBlockStyle.defaultTabStop);
+
+ _fillTabSet(paraProps, *tabIt);
+
+ VSDBullet bullet;
+ _bulletFromParaFormat(bullet, *paraIt);
+
+ /* Bullet definition changed with regard to the last paragraph style. */
+ if (bullet != currentBullet)
{
- if (!sOutputText.empty())
- m_shapeOutputText->addInsertText(sOutputText);
+ /* If the previous paragraph style had a bullet, close the list level. */
+ if (!!currentBullet)
+ m_shapeOutputText->addCloseUnorderedListLevel();
+
+ currentBullet = bullet;
+ /* If the current paragraph style has a bullet, open a new list level. */
+ if (!!currentBullet)
+ {
+ librevenge::RVNGPropertyList bulletList;
+ _listLevelFromBullet(bulletList, currentBullet);
+ m_shapeOutputText->addOpenUnorderedListLevel(bulletList);
+ }
+ }
+
+ if (!currentBullet)
+ m_shapeOutputText->addOpenParagraph(paraProps);
+ else
+ m_shapeOutputText->addOpenListElement(paraProps);
+ isParagraphOpened = true;
+ isParagraphWithoutSpan = true;
+ }
+
+ /* Any character will cause a span to open if it is not yet opened.
+ * The additional conditions aim to avoid superfluous empty span but
+ * also a paragraph without span at all. */
+ if (!isSpanOpened && ((tmpBuffer[i] != (unsigned char)'\n' && tmpBuffer[i] != 0x0d && tmpBuffer[i] != 0x0e) || isParagraphWithoutSpan))
+ {
+ librevenge::RVNGPropertyList textProps;
+ _fillCharProperties(textProps, *charIt);
+
+ // TODO: In draw, text span background cannot be specified the same way as in writer span
+ if (m_textBlockStyle.isTextBkgndFilled)
+ {
+ textProps.insert("fo:background-color", getColourString(m_textBlockStyle.textBkgndColour));
+#if 0
+ if (m_textBlockStyle.textBkgndColour.a)
+ textProps.insert("fo:background-opacity", 1.0 - m_textBlockStyle.textBkgndColour.a/255.0, librevenge::RVNG_PERCENT);
+#endif
+ }
+ m_shapeOutputText->addOpenSpan(textProps);
+ isSpanOpened = true;
+ isParagraphWithoutSpan = false;
+ }
+
+ /* Current character is a paragraph break,
+ * which will cause the paragraph to close. */
+ if (tmpBuffer[i] == (unsigned char)'\n' || tmpBuffer[i] == 0x0d || tmpBuffer[i] == 0x0e)
+ {
+ if (!sOutputVector.empty())
+ {
+ appendCharacters(sOutputText, sOutputVector, charIt->font.m_format);
+ sOutputVector.clear();
+ }
+ if (!sOutputText.empty())
+ {
+ m_shapeOutputText->addInsertText(sOutputText);
sOutputText.clear();
+ }
+ if (isSpanOpened)
+ {
m_shapeOutputText->addCloseSpan();
isSpanOpened = false;
}
+
+ if (isParagraphOpened)
+ {
+ if (!currentBullet)
+ m_shapeOutputText->addCloseParagraph();
+ else
+ m_shapeOutputText->addCloseListElement();
+ isParagraphOpened = false;
+ }
}
+ /* Current character is a tabulator. We have to output
+ * the current text buffer and insert the tab. */
+ else if (tmpBuffer[i] == (unsigned char)'\t')
+ {
+ if (!sOutputVector.empty())
+ {
+ appendCharacters(sOutputText, sOutputVector, charIt->font.m_format);
+ sOutputVector.clear();
+ }
+ if (!sOutputText.empty())
+ {
+ m_shapeOutputText->addInsertText(sOutputText);
+ sOutputText.clear();
+ }
+ m_shapeOutputText->addInsertTab();
+ }
+ /* Current character is a field placeholder. We append
+ * to the current text buffer a text representation
+ * of the field. */
+ else if (tmpBuffer[i] == 0x1e)
+ {
+ if (!sOutputVector.empty())
+ {
+ appendCharacters(sOutputText, sOutputVector, charIt->font.m_format);
+ sOutputVector.clear();
+ }
+ _appendField(sOutputText);
+ }
+ /* We have a normal UTF8 character and we append it
+ * to the current text buffer. */
else
- --charIt;
- }
+ sOutputVector.push_back(tmpBuffer[i]);
+
+ /* Decrease the count of remaining characters in the same paragraph,
+ * if it is possible. */
+ if (paraNumRemaining)
+ paraNumRemaining--;
+ /* Fetch next paragraph style if it exists. If not, just use the
+ * last one. */
+ if (!paraNumRemaining)
+ {
+ ++paraIt;
+ if (paraIt != m_paraFormats.end())
+ paraNumRemaining = paraIt->charCount;
+ else
+ --paraIt;
+ }
- /* Decrease the count of remaining characters using the same
- * tab-set definition, if it is possible. */
- if (tabNumRemaining)
- tabNumRemaining--;
- /* Fetch next tab-set definition if it exists. If not, just use the
- * last one. */
- if (!tabNumRemaining)
- {
- ++tabIt;
- if (tabIt != m_tabSets.end())
- tabNumRemaining = tabIt->m_numChars;
- else
- --tabIt;
+ /* Decrease the count of remaining characters in the same span,
+ * if it is possible. */
+ if (charNumRemaining)
+ charNumRemaining--;
+ /* Fetch next character style if it exists and close span, since
+ * the next span will have to use the new character style.
+ * If there is no more character style to fetch, just finish using
+ * the last one. */
+ if (!charNumRemaining)
+ {
+ ++charIt;
+ if (charIt != m_charFormats.end())
+ {
+ charNumRemaining = charIt->charCount;
+ if (isSpanOpened)
+ {
+ if (!sOutputVector.empty())
+ {
+ appendCharacters(sOutputText, sOutputVector, charIt->font.m_format);
+ sOutputVector.clear();
+ }
+ if (!sOutputText.empty())
+ {
+ m_shapeOutputText->addInsertText(sOutputText);
+ sOutputText.clear();
+ }
+ m_shapeOutputText->addCloseSpan();
+ isSpanOpened = false;
+ }
+ }
+ else
+ --charIt;
+ }
+
+ /* Decrease the count of remaining characters using the same
+ * tab-set definition, if it is possible. */
+ if (tabNumRemaining)
+ tabNumRemaining--;
+ /* Fetch next tab-set definition if it exists. If not, just use the
+ * last one. */
+ if (!tabNumRemaining)
+ {
+ ++tabIt;
+ if (tabIt != m_tabSets.end())
+ tabNumRemaining = tabIt->m_numChars;
+ else
+ --tabIt;
+ }
}
}
@@ -853,9 +1082,16 @@ void libvisio::VSDContentCollector::_flushText()
{
if (isSpanOpened)
{
+ if (!sOutputVector.empty())
+ {
+ appendCharacters(sOutputText, sOutputVector, charIt->font.m_format);
+ sOutputVector.clear();
+ }
if (!sOutputText.empty())
+ {
m_shapeOutputText->addInsertText(sOutputText);
- sOutputText.clear();
+ sOutputText.clear();
+ }
m_shapeOutputText->addCloseSpan();
isSpanOpened = false;
}
@@ -2434,11 +2670,7 @@ void libvisio::VSDContentCollector::collectText(unsigned level, const librevenge
m_currentText.clear();
if (!textStream.empty())
- {
- std::vector<unsigned char> tmpBuffer(textStream.size());
- memcpy(&tmpBuffer[0], textStream.getDataBuffer(), textStream.size());
- appendCharacters(m_currentText, tmpBuffer, format);
- }
+ m_currentText = libvisio::VSDName(textStream, format);
}
void libvisio::VSDContentCollector::collectParaIX(unsigned /* id */ , unsigned level, unsigned charCount, const boost::optional<double> &indFirst,
diff --git a/src/lib/VSDContentCollector.h b/src/lib/VSDContentCollector.h
index 4460e35..2338e7a 100644
--- a/src/lib/VSDContentCollector.h
+++ b/src/lib/VSDContentCollector.h
@@ -278,7 +278,7 @@ private:
std::map<unsigned, NURBSData> m_NURBSData;
std::map<unsigned, PolylineData> m_polylineData;
- librevenge::RVNGString m_currentText;
+ libvisio::VSDName m_currentText;
std::map<unsigned, librevenge::RVNGString> m_names, m_stencilNames;
std::vector<librevenge::RVNGString> m_fields;
VSDFieldList m_stencilFields;
diff --git a/src/lib/VSDTypes.h b/src/lib/VSDTypes.h
index ae0abd9..a2e6e40 100644
--- a/src/lib/VSDTypes.h
+++ b/src/lib/VSDTypes.h
@@ -188,6 +188,11 @@ public:
{
return !m_data.size();
}
+ void clear()
+ {
+ m_data.clear();
+ m_format = VSD_TEXT_ANSI;
+ }
librevenge::RVNGBinaryData m_data;
TextFormat m_format;
};
diff --git a/src/lib/libvisio_utils.cpp b/src/lib/libvisio_utils.cpp
index bd03e75..08b5b2d 100644
--- a/src/lib/libvisio_utils.cpp
+++ b/src/lib/libvisio_utils.cpp
@@ -112,7 +112,6 @@ const librevenge::RVNGString libvisio::getColourString(const Colour &c)
void libvisio::appendUCS4(librevenge::RVNGString &text, UChar32 ucs4Character)
{
// Convert carriage returns to new line characters
- // Writerperfect/LibreOffice will replace them by <text:line-break>
if (ucs4Character == (UChar32) 0x0d || ucs4Character == (UChar32) 0x0e)
ucs4Character = (UChar32) '\n';