diff options
-rw-r--r-- | vcl/inc/sallayout.hxx | 5 | ||||
-rw-r--r-- | vcl/qa/cppunit/pdfexport/data/tdf115117-1.odt | bin | 0 -> 8566 bytes | |||
-rw-r--r-- | vcl/qa/cppunit/pdfexport/data/tdf115117-2.odt | bin | 0 -> 8629 bytes | |||
-rw-r--r-- | vcl/qa/cppunit/pdfexport/pdfexport.cxx | 217 | ||||
-rw-r--r-- | vcl/source/gdi/CommonSalLayout.cxx | 45 | ||||
-rw-r--r-- | vcl/source/gdi/pdfwriter_impl.cxx | 48 |
6 files changed, 277 insertions, 38 deletions
diff --git a/vcl/inc/sallayout.hxx b/vcl/inc/sallayout.hxx index ff008c44dd55..c4a29dc9d0c9 100644 --- a/vcl/inc/sallayout.hxx +++ b/vcl/inc/sallayout.hxx @@ -255,6 +255,7 @@ struct GlyphItem { int mnFlags; int mnCharPos; // index in string + int mnCharCount; // number of characters making up this glyph int mnOrigWidth; // original glyph width int mnNewWidth; // width after adjustments @@ -270,6 +271,7 @@ public: long nFlags, int nOrigWidth ) : mnFlags(nFlags) , mnCharPos(nCharPos) + , mnCharCount(1) , mnOrigWidth(nOrigWidth) , mnNewWidth(nOrigWidth) , mnXOffset(0) @@ -278,10 +280,11 @@ public: , mnFallbackLevel(0) { } - GlyphItem( int nCharPos, sal_GlyphId aGlyphId, const Point& rLinearPos, + GlyphItem(int nCharPos, int nCharCount, sal_GlyphId aGlyphId, const Point& rLinearPos, long nFlags, int nOrigWidth, int nXOffset ) : mnFlags(nFlags) , mnCharPos(nCharPos) + , mnCharCount(nCharCount) , mnOrigWidth(nOrigWidth) , mnNewWidth(nOrigWidth) , mnXOffset(nXOffset) diff --git a/vcl/qa/cppunit/pdfexport/data/tdf115117-1.odt b/vcl/qa/cppunit/pdfexport/data/tdf115117-1.odt Binary files differnew file mode 100644 index 000000000000..63fe82946ef1 --- /dev/null +++ b/vcl/qa/cppunit/pdfexport/data/tdf115117-1.odt diff --git a/vcl/qa/cppunit/pdfexport/data/tdf115117-2.odt b/vcl/qa/cppunit/pdfexport/data/tdf115117-2.odt Binary files differnew file mode 100644 index 000000000000..c1e1f6d4392c --- /dev/null +++ b/vcl/qa/cppunit/pdfexport/data/tdf115117-2.odt diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx index a904a5dc638d..ba8df2f1a616 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx @@ -8,6 +8,7 @@ */ #include <config_features.h> +#include <config_test.h> #include <com/sun/star/frame/Desktop.hpp> #include <com/sun/star/frame/XStorable.hpp> @@ -25,6 +26,7 @@ #include <tools/zcodec.hxx> #if HAVE_FEATURE_PDFIUM #include <fpdf_edit.h> +#include <fpdf_text.h> #include <fpdfview.h> #endif @@ -67,6 +69,16 @@ public: void testTdf99680(); void testTdf99680_2(); void testTdf108963(); +#if !TEST_FONTS_MISSING + /// Test writing ToUnicode CMAP for LTR ligatures. + void testTdf115117_1(); + /// Text extracting LTR text with ligatures. + void testTdf115117_1a(); + /// Test writing ToUnicode CMAP for RTL ligatures. + void testTdf115117_2(); + /// Text extracting RTL text with ligatures. + void testTdf115117_2a(); +#endif #endif CPPUNIT_TEST_SUITE(PdfExportTest); @@ -85,6 +97,12 @@ public: CPPUNIT_TEST(testTdf99680); CPPUNIT_TEST(testTdf99680_2); CPPUNIT_TEST(testTdf108963); +#if !TEST_FONTS_MISSING + CPPUNIT_TEST(testTdf115117_1); + CPPUNIT_TEST(testTdf115117_1a); + CPPUNIT_TEST(testTdf115117_2); + CPPUNIT_TEST(testTdf115117_2a); +#endif #endif CPPUNIT_TEST_SUITE_END(); }; @@ -760,6 +778,205 @@ void PdfExportTest::testTdf108963() CPPUNIT_ASSERT_EQUAL(1, nYellowPathCount); } + +#if !TEST_FONTS_MISSING +// This requires Carlito font, if it is missing the test will most likely +// fail. +void PdfExportTest::testTdf115117_1() +{ + vcl::filter::PDFDocument aDocument; + load("tdf115117-1.odt", aDocument); + + vcl::filter::PDFObjectElement* pToUnicode = nullptr; + + // Get access to ToUnicode of the first font + for (const auto& aElement : aDocument.GetElements()) + { + auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(aElement.get()); + if (!pObject) + continue; + auto pType = dynamic_cast<vcl::filter::PDFNameElement*>(pObject->Lookup("Type")); + if (pType && pType->GetValue() == "Font") + { + auto pToUnicodeRef = dynamic_cast<vcl::filter::PDFReferenceElement*>(pObject->Lookup("ToUnicode")); + CPPUNIT_ASSERT(pToUnicodeRef); + pToUnicode = pToUnicodeRef->LookupObject(); + break; + } + } + + CPPUNIT_ASSERT(pToUnicode); + auto pStream = pToUnicode->GetStream(); + CPPUNIT_ASSERT(pStream); + SvMemoryStream aObjectStream; + ZCodec aZCodec; + aZCodec.BeginCompression(); + pStream->GetMemory().Seek(0); + aZCodec.Decompress(pStream->GetMemory(), aObjectStream); + CPPUNIT_ASSERT(aZCodec.EndCompression()); + aObjectStream.Seek(0); + // The first values, <01> <02> etc., are glyph ids, they might change order + // if we changed how font subsets are created. + // The second values, <00740069> etc., are Unicode code points in hex, + // <00740069> is U+0074 and U+0069 i.e. "ti" which is a ligature in + // Carlito/Callibri. This test is failing if any of the second values + // changed which means we are not detecting ligatures and writing CMAP + // entries for them correctly. If glyph order in the subset changes then + // the order here will changes and the PDF has to be carefully inspected to + // ensure that the new values are correct before updating the string below. + OString aCmap("9 beginbfchar\n" + "<01> <00740069>\n" + "<02> <0020>\n" + "<03> <0074>\n" + "<04> <0065>\n" + "<05> <0073>\n" + "<06> <00660069>\n" + "<07> <0066006C>\n" + "<08> <006600660069>\n" + "<09> <00660066006C>\n" + "endbfchar"); + auto pStart = static_cast<const char*>(aObjectStream.GetData()); + const char* pEnd = pStart + aObjectStream.GetSize(); + auto it = std::search(pStart, pEnd, aCmap.getStr(), aCmap.getStr() + aCmap.getLength()); + CPPUNIT_ASSERT(it != pEnd); +} + +// This requires DejaVu Sans font, if it is missing the test will most likely +// fail. +void PdfExportTest::testTdf115117_2() +{ + // See the comments in testTdf115117_1() for explanation. + + vcl::filter::PDFDocument aDocument; + load("tdf115117-2.odt", aDocument); + + vcl::filter::PDFObjectElement* pToUnicode = nullptr; + + for (const auto& aElement : aDocument.GetElements()) + { + auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(aElement.get()); + if (!pObject) + continue; + auto pType = dynamic_cast<vcl::filter::PDFNameElement*>(pObject->Lookup("Type")); + if (pType && pType->GetValue() == "Font") + { + auto pToUnicodeRef = dynamic_cast<vcl::filter::PDFReferenceElement*>(pObject->Lookup("ToUnicode")); + CPPUNIT_ASSERT(pToUnicodeRef); + pToUnicode = pToUnicodeRef->LookupObject(); + break; + } + } + + CPPUNIT_ASSERT(pToUnicode); + auto pStream = pToUnicode->GetStream(); + CPPUNIT_ASSERT(pStream); + SvMemoryStream aObjectStream; + ZCodec aZCodec; + aZCodec.BeginCompression(); + pStream->GetMemory().Seek(0); + aZCodec.Decompress(pStream->GetMemory(), aObjectStream); + CPPUNIT_ASSERT(aZCodec.EndCompression()); + aObjectStream.Seek(0); + OString aCmap("7 beginbfchar\n" + "<01> <06440627>\n" + "<02> <0020>\n" + "<03> <0641>\n" + "<04> <0642>\n" + "<05> <0648>\n" + "<06> <06440627>\n" + "<07> <0628>\n" + "endbfchar"); + auto pStart = static_cast<const char*>(aObjectStream.GetData()); + const char* pEnd = pStart + aObjectStream.GetSize(); + auto it = std::search(pStart, pEnd, aCmap.getStr(), aCmap.getStr() + aCmap.getLength()); + CPPUNIT_ASSERT(it != pEnd); +} + +void PdfExportTest::testTdf115117_1a() +{ + // Import the bugdoc and export as PDF. + OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "tdf115117-1.odt"; + mxComponent = loadFromDesktop(aURL); + CPPUNIT_ASSERT(mxComponent.is()); + + uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY); + utl::TempFile aTempFile; + aTempFile.EnableKillingFile(); + utl::MediaDescriptor aMediaDescriptor; + aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export"); + xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList()); + + // Parse the export result with pdfium. + SvFileStream aFile(aTempFile.GetURL(), StreamMode::READ); + SvMemoryStream aMemory; + aMemory.WriteStream(aFile); + mpPdfDocument = FPDF_LoadMemDocument(aMemory.GetData(), aMemory.GetSize(), /*password=*/nullptr); + CPPUNIT_ASSERT(mpPdfDocument); + + // The document has one page. + CPPUNIT_ASSERT_EQUAL(1, FPDF_GetPageCount(mpPdfDocument)); + mpPdfPage = FPDF_LoadPage(mpPdfDocument, /*page_index=*/0); + CPPUNIT_ASSERT(mpPdfPage); + + auto pPdfTextPage = FPDFText_LoadPage(mpPdfPage); + CPPUNIT_ASSERT(pPdfTextPage); + + // Extract the text from the page. This pdfium API is a bit higher level + // than we want and might apply heuristic that give false positive, but it + // is a good approximation in addition to the check in testTdf115117_1(). + int nChars = FPDFText_CountChars(pPdfTextPage); + CPPUNIT_ASSERT_EQUAL(44, nChars); + + OUString aExpectedText = "ti ti test ti\r\nti test fi fl ffi ffl test fi"; + std::vector<sal_uInt32> aChars(nChars); + for (int i = 0; i < nChars; i++) + aChars[i] = FPDFText_GetUnicode(pPdfTextPage, i); + OUString aActualText(aChars.data(), aChars.size()); + CPPUNIT_ASSERT_EQUAL(aExpectedText, aActualText); +} + +void PdfExportTest::testTdf115117_2a() +{ + // See the comments in testTdf115117_1a() for explanation. + + // Import the bugdoc and export as PDF. + OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "tdf115117-2.odt"; + mxComponent = loadFromDesktop(aURL); + CPPUNIT_ASSERT(mxComponent.is()); + + uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY); + utl::TempFile aTempFile; + aTempFile.EnableKillingFile(); + utl::MediaDescriptor aMediaDescriptor; + aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export"); + xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList()); + + // Parse the export result with pdfium. + SvFileStream aFile(aTempFile.GetURL(), StreamMode::READ); + SvMemoryStream aMemory; + aMemory.WriteStream(aFile); + mpPdfDocument = FPDF_LoadMemDocument(aMemory.GetData(), aMemory.GetSize(), /*password=*/nullptr); + CPPUNIT_ASSERT(mpPdfDocument); + + // The document has one page. + CPPUNIT_ASSERT_EQUAL(1, FPDF_GetPageCount(mpPdfDocument)); + mpPdfPage = FPDF_LoadPage(mpPdfDocument, /*page_index=*/0); + CPPUNIT_ASSERT(mpPdfPage); + + auto pPdfTextPage = FPDFText_LoadPage(mpPdfPage); + CPPUNIT_ASSERT(pPdfTextPage); + + int nChars = FPDFText_CountChars(pPdfTextPage); + CPPUNIT_ASSERT_EQUAL(13, nChars); + + OUString aExpectedText = u"\u0627\u0644 \u0628\u0627\u0644 \u0648\u0642\u0641 \u0627\u0644"; + std::vector<sal_uInt32> aChars(nChars); + for (int i = 0; i < nChars; i++) + aChars[i] = FPDFText_GetUnicode(pPdfTextPage, i); + OUString aActualText(aChars.data(), aChars.size()); + CPPUNIT_ASSERT_EQUAL(aExpectedText, aActualText); +} +#endif #endif CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest); diff --git a/vcl/source/gdi/CommonSalLayout.cxx b/vcl/source/gdi/CommonSalLayout.cxx index 2f110b138a8a..cfd86ed27409 100644 --- a/vcl/source/gdi/CommonSalLayout.cxx +++ b/vcl/source/gdi/CommonSalLayout.cxx @@ -690,6 +690,49 @@ bool CommonSalLayout::LayoutText(ImplLayoutArgs& rArgs) for (int i = 0; i < nRunGlyphCount; ++i) { int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint; int32_t nCharPos = pHbGlyphInfos[i].cluster; + int32_t nCharCount = 0; + + // Find the number of characters that make up this glyph. + if (!bRightToLeft) + { + // If the cluster is the same as previous glyph, then this + // already consumed, skip. + if (i > 0 && pHbGlyphInfos[i].cluster == pHbGlyphInfos[i - 1].cluster) + nCharCount = 0; + else + { + // Find the next glyph with a different cluster, or the + // end of text. + int j = i; + int32_t nNextCharPos = nCharPos; + while (nNextCharPos == nCharPos && j < nRunGlyphCount) + nNextCharPos = pHbGlyphInfos[j++].cluster; + + if (nNextCharPos == nCharPos) + nNextCharPos = rArgs.mnEndCharPos; + nCharCount = nNextCharPos - nCharPos; + } + } + else + { + // If the cluster is the same as previous glyph, then this + // will be consumed later, skip. + if (i < nRunGlyphCount - 1 && pHbGlyphInfos[i].cluster == pHbGlyphInfos[i + 1].cluster) + nCharCount = 0; + else + { + // Find the previous glyph with a different cluster, or + // the end of text. + int j = i; + int32_t nNextCharPos = nCharPos; + while (nNextCharPos == nCharPos && j >= 0) + nNextCharPos = pHbGlyphInfos[j--].cluster; + + if (nNextCharPos == nCharPos) + nNextCharPos = rArgs.mnEndCharPos; + nCharCount = nNextCharPos - nCharPos; + } + } // if needed request glyph fallback by updating LayoutArgs if (!nGlyphIndex) @@ -756,7 +799,7 @@ bool CommonSalLayout::LayoutText(ImplLayoutArgs& rArgs) nYOffset = std::lround(nYOffset * nYScale); Point aNewPos(aCurrPos.X() + nXOffset, aCurrPos.Y() + nYOffset); - const GlyphItem aGI(nCharPos, nGlyphIndex, aNewPos, nGlyphFlags, + const GlyphItem aGI(nCharPos, nCharCount, nGlyphIndex, aNewPos, nGlyphFlags, nAdvance, nXOffset); AppendGlyph(aGI); diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx index 03b1a1d9e12d..58711a9d862b 100644 --- a/vcl/source/gdi/pdfwriter_impl.cxx +++ b/vcl/source/gdi/pdfwriter_impl.cxx @@ -6598,7 +6598,6 @@ void PDFWriterImpl::drawLayout( SalLayout& rLayout, const OUString& rText, bool bool bVertical = m_aCurrentPDFState.m_aFont.IsVertical(); int nGlyphs; int nIndex = 0; - int nMaxCharPos = rText.getLength()-1; double fXScale = 1.0; double fSkew = 0.0; sal_Int32 nPixelFontHeight = m_pReferenceDevice->mpFontInstance->maFontSelData.mnHeight; @@ -6717,48 +6716,25 @@ void PDFWriterImpl::drawLayout( SalLayout& rLayout, const OUString& rText, bool FontMetric aRefDevFontMetric = m_pReferenceDevice->GetFontMetric(); // collect the glyphs into a single array - const int nTmpMaxGlyphs = rLayout.GetOrientation() ? 1 : nMaxGlyphs; // #i97991# temporary workaround for #i87686# std::vector< PDFGlyph > aGlyphs; - aGlyphs.reserve( nTmpMaxGlyphs ); + aGlyphs.reserve( nMaxGlyphs ); // first get all the glyphs and register them; coordinates still in Pixel Point aGNGlyphPos; - while ((nGlyphs = rLayout.GetNextGlyphs(nTmpMaxGlyphs, pGlyphs, aGNGlyphPos, nIndex, pFallbackFonts)) != 0) + while ((nGlyphs = rLayout.GetNextGlyphs(nMaxGlyphs, pGlyphs, aGNGlyphPos, nIndex, pFallbackFonts)) != 0) { aCodeUnits.clear(); + aCodeUnitsPerGlyph.clear(); for( int i = 0; i < nGlyphs; i++ ) { - // default case: 1 glyph is one unicode - aCodeUnitsPerGlyph.push_back(1); - if (pGlyphs[i]->mnCharPos >= 0 && pGlyphs[i]->mnCharPos <= nMaxCharPos) - { - int nChars = 1; - // try to handle ligatures and such - if( i < nGlyphs-1 ) - { - nChars = pGlyphs[i+1]->mnCharPos - pGlyphs[i]->mnCharPos; - int start = pGlyphs[i]->mnCharPos; - // #i115618# fix for simple RTL+CTL cases - // supports RTL ligatures. TODO: more complex CTL, etc. - if( nChars < 0 ) - { - nChars = -nChars; - start = pGlyphs[i+1]->mnCharPos + 1; - } - else if (nChars == 0) - nChars = 1; - aCodeUnitsPerGlyph.back() = nChars; - for( int n = 0; n < nChars; n++ ) - aCodeUnits.push_back( rText[ start + n ] ); - } - else - aCodeUnits.push_back(rText[pGlyphs[i]->mnCharPos]); - } - else - aCodeUnits.push_back( 0 ); - // note: in case of ctl one character may result - // in multiple glyphs. The current SalLayout - // implementations set -1 then to indicate that no direct - // mapping is possible + // try to handle ligatures and such + int nStart = pGlyphs[i]->mnCharPos; + int nChars = pGlyphs[i]->mnCharCount; + if (nChars < 0) + nChars = 0; + + aCodeUnitsPerGlyph.push_back(nChars); + for( int n = 0; n < nChars; n++ ) + aCodeUnits.push_back( rText[ nStart + n ] ); } registerGlyphs( nGlyphs, pGlyphs, pGlyphWidths, aCodeUnits.data(), aCodeUnitsPerGlyph.data(), pMappedGlyphs, pMappedFontObjects, pFallbackFonts ); |