summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuboš Luňák <l.lunak@collabora.com>2020-07-28 10:37:16 +0200
committerLuboš Luňák <l.lunak@collabora.com>2020-07-29 11:45:31 +0200
commit7a38f1817b0568cfbcda9a91dc86eafaba336871 (patch)
treee2cda33ed6a5c81389a09b42cb1f47c4b8ded864
parentdf56a000a165da01a14b2fdf2ad66f371a452ce8 (diff)
optimize a bit more conversions to/from Skia bitmap formats
It turns out this doesn't really matter in practice, since if converting between pixel formats is where time is spent, something higher must be already wrong. But since I've already written this... Change-Id: I25451664d529a9226d2d81b2c424a4f4e5422ad5 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/99577 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
-rw-r--r--external/skia/Library_skia.mk5
-rw-r--r--external/skia/UnpackedTarball_skia.mk1
-rw-r--r--external/skia/extend-rgb-to-rgba.patch.029
-rw-r--r--external/skia/inc/skia_opts.hxx28
-rw-r--r--external/skia/source/skia_opts.cxx75
-rw-r--r--external/skia/source/skia_opts_internal.hxx83
-rw-r--r--external/skia/source/skia_opts_ssse3.cxx17
-rw-r--r--solenv/clang-format/excludelist4
-rw-r--r--vcl/skia/SkiaHelper.cxx2
-rw-r--r--vcl/skia/salbmp.cxx85
10 files changed, 269 insertions, 60 deletions
diff --git a/external/skia/Library_skia.mk b/external/skia/Library_skia.mk
index 1cd4adac9f79..5fea049c901b 100644
--- a/external/skia/Library_skia.mk
+++ b/external/skia/Library_skia.mk
@@ -91,6 +91,11 @@ $(eval $(call gb_Library_set_include,skia,\
$(eval $(call gb_Library_add_exception_objects,skia,\
external/skia/source/SkMemory_malloc \
external/skia/source/skia_compiler \
+ external/skia/source/skia_opts \
+))
+
+$(eval $(call gb_Library_add_exception_objects,skia,\
+ external/skia/source/skia_opts_ssse3, $(CXXFLAGS_INTRINSICS_SSSE3) $(CLANG_CXXFLAGS_INTRINSICS_SSSE3) \
))
$(eval $(call gb_Library_set_generated_cxx_suffix,skia,cpp))
diff --git a/external/skia/UnpackedTarball_skia.mk b/external/skia/UnpackedTarball_skia.mk
index 8ea1fa992a99..23e864e3f206 100644
--- a/external/skia/UnpackedTarball_skia.mk
+++ b/external/skia/UnpackedTarball_skia.mk
@@ -34,7 +34,6 @@ skia_patches := \
windows-force-unicode-api.patch.0 \
operator-eq-bool.patch.1 \
fix-without-gl.patch.0 \
- extend-rgb-to-rgba.patch.0 \
windows-typeface-directwrite.patch.0 \
windows-raster-surface-no-copies.patch.1 \
fix-windows-dwrite.patch.1 \
diff --git a/external/skia/extend-rgb-to-rgba.patch.0 b/external/skia/extend-rgb-to-rgba.patch.0
deleted file mode 100644
index e0ce55fa2377..000000000000
--- a/external/skia/extend-rgb-to-rgba.patch.0
+++ /dev/null
@@ -1,29 +0,0 @@
-diff --git a/include/core/SkSwizzle.h b/include/core/SkSwizzle.h
-index 61e93b2da7..9a26f0f492 100644
---- ./include/core/SkSwizzle.h
-+++ ./include/core/SkSwizzle.h
-@@ -16,4 +16,8 @@
- */
- SK_API void SkSwapRB(uint32_t* dest, const uint32_t* src, int count);
-
-+SK_API void SkExtendRGBToRGBA(uint32_t* dest, const uint8_t* src, int count);
-+
-+SK_API void SkExtendGrayToRGBA(uint32_t* dest, const uint8_t* src, int count);
-+
- #endif
-diff --git a/src/core/SkSwizzle.cpp b/src/core/SkSwizzle.cpp
-index 301b0184f1..382323695f 100644
---- ./src/core/SkSwizzle.cpp
-+++ ./src/core/SkSwizzle.cpp
-@@ -12,3 +12,11 @@
- void SkSwapRB(uint32_t* dest, const uint32_t* src, int count) {
- SkOpts::RGBA_to_BGRA(dest, src, count);
- }
-+
-+void SkExtendRGBToRGBA(uint32_t* dest, const uint8_t* src, int count) {
-+ SkOpts::RGB_to_RGB1(dest, src, count);
-+}
-+
-+void SkExtendGrayToRGBA(uint32_t* dest, const uint8_t* src, int count) {
-+ SkOpts::gray_to_RGB1(dest, src, count);
-+}
diff --git a/external/skia/inc/skia_opts.hxx b/external/skia/inc/skia_opts.hxx
new file mode 100644
index 000000000000..e292f0920fe8
--- /dev/null
+++ b/external/skia/inc/skia_opts.hxx
@@ -0,0 +1,28 @@
+/*
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKIA_OPTS_H
+#define SKIA_OPTS_H
+
+#include <include/core/SkTypes.h>
+
+SK_API void SkConvertRGBToRGBA(uint32_t* dest, const uint8_t* src, int count);
+
+SK_API void SkConvertGrayToRGBA(uint32_t* dest, const uint8_t* src, int count);
+
+SK_API void SkConvertRGBAToRGB(uint8_t* dest, const uint32_t* src, int count);
+
+SK_API void SkConvertRGBAToGrayFast(uint8_t* dest, const uint32_t* src, int count);
+
+namespace SkLoOpts
+{
+SK_API void Init();
+
+typedef void (*Swizzle_u8_8888)(uint8_t*, const uint32_t*, int);
+extern Swizzle_u8_8888 RGB1_to_RGB, // i.e. remove an (opaque) alpha
+ RGB1_to_gray_fast; // i.e. copy one channel to the result
+}
+
+#endif
diff --git a/external/skia/source/skia_opts.cxx b/external/skia/source/skia_opts.cxx
new file mode 100644
index 000000000000..061e599e71ab
--- /dev/null
+++ b/external/skia/source/skia_opts.cxx
@@ -0,0 +1,75 @@
+/*
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include <skia_opts.hxx>
+
+#if defined __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#endif
+#include "src/core/SkCpu.h"
+#include "src/core/SkOpts.h"
+#if defined __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+void SkConvertRGBToRGBA(uint32_t* dest, const uint8_t* src, int count)
+{
+ SkOpts::RGB_to_RGB1(dest, src, count);
+}
+
+void SkConvertGrayToRGBA(uint32_t* dest, const uint8_t* src, int count)
+{
+ SkOpts::gray_to_RGB1(dest, src, count);
+}
+
+void SkConvertRGBAToRGB(uint8_t* dest, const uint32_t* src, int count)
+{
+ SkLoOpts::RGB1_to_RGB(dest, src, count);
+}
+
+void SkConvertRGBAToGrayFast(uint8_t* dest, const uint32_t* src, int count)
+{
+ SkLoOpts::RGB1_to_gray_fast(dest, src, count);
+}
+
+// The rest is mostly based on Skia's SkOpts.cpp, reduced to only SSSE3 so far.
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+ #define SK_OPTS_NS ssse3
+#else
+ #define SK_OPTS_NS portable
+#endif
+
+#include "skia_opts_internal.hxx"
+
+namespace SkLoOpts {
+ // Define default function pointer values here...
+ // If our global compile options are set high enough, these defaults might even be
+ // CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults.
+ // They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1.
+#define DEFINE_DEFAULT(name) decltype(name) name = SK_OPTS_NS::name
+ DEFINE_DEFAULT(RGB1_to_RGB);
+ DEFINE_DEFAULT(RGB1_to_gray_fast);
+#undef DEFINE_DEFAULT
+
+ // Each Init_foo() is defined in its own file.
+ void Init_ssse3();
+
+ static void init() {
+#if !defined(SK_BUILD_NO_OPTS)
+ #if defined(SK_CPU_X86)
+ #if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSSE3
+ if (SkCpu::Supports(SkCpu::SSSE3)) { Init_ssse3(); }
+ #endif
+ #endif
+#endif
+ }
+
+ void Init() {
+ static SkOnce once;
+ once(init);
+ }
+} // namespace SkLoOpts
diff --git a/external/skia/source/skia_opts_internal.hxx b/external/skia/source/skia_opts_internal.hxx
new file mode 100644
index 000000000000..3f9d79c22352
--- /dev/null
+++ b/external/skia/source/skia_opts_internal.hxx
@@ -0,0 +1,83 @@
+/*
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKIA_OPTS_INTERNAL_H
+#define SKIA_OPTS_INTERNAL_H
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+ #include <immintrin.h>
+#endif
+
+namespace SK_OPTS_NS {
+
+static void RGB1_to_RGB_portable(uint8_t dst[], const uint32_t* src, int count) {
+ for (int i = 0; i < count; i++) {
+ dst[0] = src[i] >> 0;
+ dst[1] = src[i] >> 8;
+ dst[2] = src[i] >> 16;
+ dst += 3;
+ }
+}
+static void RGB1_to_gray_fast_portable(uint8_t dst[], const uint32_t* src, int count) {
+ for (int i = 0; i < count; i++) {
+ dst[i] = src[i] & 0xFF;
+ }
+}
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+inline void RGB1_to_RGB(uint8_t dst[], const uint32_t* src, int count) {
+ const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant.
+ __m128i pack = _mm_setr_epi8(0,1,2, 4,5,6, 8,9,10, 12,13,14, X,X,X,X);
+
+// Storing 4 pixels should store 12 bytes, but here it stores 16, so test count >= 6
+// in order to not overrun the output buffer.
+ while (count >= 6) {
+ __m128i rgba = _mm_loadu_si128((const __m128i*) src);
+
+ __m128i rgb = _mm_shuffle_epi8(rgba, pack);
+
+ // Store 4 pixels.
+ _mm_storeu_si128((__m128i*) dst, rgb);
+
+ src += 4*4;
+ dst += 4*3;
+ count -= 4;
+ }
+ RGB1_to_RGB_portable(dst, src, count);
+}
+
+inline void RGB1_to_gray_fast(uint8_t dst[], const uint32_t* src, int count) {
+ const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant.
+ __m128i pack = _mm_setr_epi8(0,4,8,12, X,X,X,X,X,X,X,X,X,X,X,X);
+
+// Storing 4 pixels should store 4 bytes, but here it stores 16, so test count >= 16
+// in order to not overrun the output buffer.
+ while (count >= 16) {
+ __m128i rgba = _mm_loadu_si128((const __m128i*) src);
+
+ __m128i rgb = _mm_shuffle_epi8(rgba, pack);
+
+ // Store 4 pixels.
+ _mm_storeu_si128((__m128i*) dst, rgb);
+
+ src += 4*4;
+ dst += 4;
+ count -= 4;
+ }
+ RGB1_to_gray_fast_portable(dst, src, count);
+}
+
+#else
+inline void RGB1_to_RGB(uint8_t dst[], const uint32_t* src, int count) {
+ RGB1_to_RGB_portable(dst, src, count);
+}
+inline void RGB1_to_gray_fast(uint8_t dst[], const uint32_t* src, int count) {
+ RGB1_to_gray_fast_portable(dst, src, count);
+}
+#endif
+
+} // namespace
+
+#endif
diff --git a/external/skia/source/skia_opts_ssse3.cxx b/external/skia/source/skia_opts_ssse3.cxx
new file mode 100644
index 000000000000..8d19b6eeabaf
--- /dev/null
+++ b/external/skia/source/skia_opts_ssse3.cxx
@@ -0,0 +1,17 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include <skia_opts.hxx>
+#define SK_OPTS_NS ssse3
+#include "skia_opts_internal.hxx"
+
+namespace SkLoOpts {
+ void Init_ssse3() {
+ RGB1_to_RGB = ssse3::RGB1_to_RGB;
+ RGB1_to_gray_fast = ssse3::RGB1_to_gray_fast;
+ }
+}
diff --git a/solenv/clang-format/excludelist b/solenv/clang-format/excludelist
index 11b8b617a491..edb9818f4d48 100644
--- a/solenv/clang-format/excludelist
+++ b/solenv/clang-format/excludelist
@@ -4383,6 +4383,10 @@ external/redland/raptor/raptor_config.h
external/redland/rasqal/rasqal.h
external/redland/redland/librdf.h
external/sane/inc/sane/sane.h
+external/skia/inc/skia_opts.hxx
+external/skia/source/skia_opts.cxx
+external/skia/source/skia_opts_internal.hxx
+external/skia/source/skia_opts_ssse3.cxx
external/unixODBC/inc/odbc/sql.h
external/unixODBC/inc/odbc/sqlext.h
external/unixODBC/inc/odbc/sqltypes.h
diff --git a/vcl/skia/SkiaHelper.cxx b/vcl/skia/SkiaHelper.cxx
index b7d45ad61196..a3df4315b668 100644
--- a/vcl/skia/SkiaHelper.cxx
+++ b/vcl/skia/SkiaHelper.cxx
@@ -38,6 +38,7 @@ bool isVCLSkiaEnabled() { return false; }
#include <SkSurface.h>
#include <SkGraphics.h>
#include <skia_compiler.hxx>
+#include <skia_opts.hxx>
#ifdef DBG_UTIL
#include <fstream>
@@ -239,6 +240,7 @@ bool isVCLSkiaEnabled()
{
bRet = true;
SkGraphics::Init();
+ SkLoOpts::Init();
// don't actually block if denylisted, but log it if enabled, and also get the vendor id
checkDeviceDenylisted(true);
}
diff --git a/vcl/skia/salbmp.cxx b/vcl/skia/salbmp.cxx
index dda47b46fa25..ed66eddbc3c5 100644
--- a/vcl/skia/salbmp.cxx
+++ b/vcl/skia/salbmp.cxx
@@ -37,6 +37,7 @@
#include <SkSwizzle.h>
#include <SkColorFilter.h>
#include <SkColorMatrix.h>
+#include <skia_opts.hxx>
#include <skia/utils.hxx>
#include <skia/zone.hxx>
@@ -450,7 +451,7 @@ SkBitmap SkiaSalBitmap::GetAsSkBitmap() const
#endif
if (!bitmap.installPixels(
SkImageInfo::MakeS32(mPixelsSize.Width(), mPixelsSize.Height(), alphaType),
- data.release(), mPixelsSize.Width() * 4,
+ data.release(), mScanlineSize,
[](void* addr, void*) { delete[] static_cast<sal_uInt8*>(addr); }, nullptr))
abort();
bitmap.setImmutable();
@@ -461,13 +462,18 @@ SkBitmap SkiaSalBitmap::GetAsSkBitmap() const
std::unique_ptr<uint32_t[]> data(
new uint32_t[mPixelsSize.Height() * mPixelsSize.Width()]);
uint32_t* dest = data.get();
- for (long y = 0; y < mPixelsSize.Height(); ++y)
+ // SkConvertRGBToRGBA() also works as BGR to BGRA (the function extends 3 bytes to 4
+ // by adding 0xFF alpha, so position of B and R doesn't matter).
+ if (mPixelsSize.Width() * 3 == mScanlineSize)
+ SkConvertRGBToRGBA(dest, mBuffer.get(), mPixelsSize.Height() * mPixelsSize.Width());
+ else
{
- const sal_uInt8* src = mBuffer.get() + mScanlineSize * y;
- // This also works as BGR to BGRA (the function extends 3 bytes to 4
- // by adding 0xFF alpha, so position of B and R doesn't matter).
- SkExtendRGBToRGBA(dest, src, mPixelsSize.Width());
- dest += mPixelsSize.Width();
+ for (long y = 0; y < mPixelsSize.Height(); ++y)
+ {
+ const sal_uInt8* src = mBuffer.get() + mScanlineSize * y;
+ SkConvertRGBToRGBA(dest, src, mPixelsSize.Width());
+ dest += mPixelsSize.Width();
+ }
}
if (!bitmap.installPixels(
SkImageInfo::MakeS32(mPixelsSize.Width(), mPixelsSize.Height(),
@@ -486,11 +492,17 @@ SkBitmap SkiaSalBitmap::GetAsSkBitmap() const
std::unique_ptr<uint32_t[]> data(
new uint32_t[mPixelsSize.Height() * mPixelsSize.Width()]);
uint32_t* dest = data.get();
- for (long y = 0; y < mPixelsSize.Height(); ++y)
+ if (mPixelsSize.Width() * 1 == mScanlineSize)
+ SkConvertGrayToRGBA(dest, mBuffer.get(),
+ mPixelsSize.Height() * mPixelsSize.Width());
+ else
{
- const sal_uInt8* src = mBuffer.get() + mScanlineSize * y;
- SkExtendGrayToRGBA(dest, src, mPixelsSize.Width());
- dest += mPixelsSize.Width();
+ for (long y = 0; y < mPixelsSize.Height(); ++y)
+ {
+ const sal_uInt8* src = mBuffer.get() + mScanlineSize * y;
+ SkConvertGrayToRGBA(dest, src, mPixelsSize.Width());
+ dest += mPixelsSize.Width();
+ }
}
if (!bitmap.installPixels(
SkImageInfo::MakeS32(mPixelsSize.Width(), mPixelsSize.Height(),
@@ -826,37 +838,50 @@ void SkiaSalBitmap::EnsureBitmapData()
assert(mBuffer != nullptr);
if (mBitCount == 32)
{
- for (long y = 0; y < mSize.Height(); ++y)
+ if (int(bitmap.rowBytes()) == mScanlineSize)
+ memcpy(mBuffer.get(), bitmap.getPixels(), mSize.Height() * mScanlineSize);
+ else
{
- const uint8_t* src = static_cast<uint8_t*>(bitmap.getAddr(0, y));
- sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
- memcpy(dest, src, mScanlineSize);
+ for (long y = 0; y < mSize.Height(); ++y)
+ {
+ const uint8_t* src = static_cast<uint8_t*>(bitmap.getAddr(0, y));
+ sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
+ memcpy(dest, src, mScanlineSize);
+ }
}
}
else if (mBitCount == 24) // non-paletted
{
- for (long y = 0; y < mSize.Height(); ++y)
+ if (int(bitmap.rowBytes()) == mSize.Width() * 4 && mSize.Width() * 3 == mScanlineSize)
{
- const uint8_t* src = static_cast<uint8_t*>(bitmap.getAddr(0, y));
- sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
- for (long x = 0; x < mSize.Width(); ++x)
+ SkConvertRGBAToRGB(mBuffer.get(), bitmap.getAddr32(0, 0),
+ mSize.Height() * mSize.Width());
+ }
+ else
+ {
+ for (long y = 0; y < mSize.Height(); ++y)
{
- *dest++ = *src++;
- *dest++ = *src++;
- *dest++ = *src++;
- ++src; // skip alpha
+ const uint32_t* src = bitmap.getAddr32(0, y);
+ sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
+ SkConvertRGBAToRGB(dest, src, mSize.Width());
}
}
}
else if (mBitCount == 8 && mPalette.IsGreyPalette8Bit())
- {
- for (long y = 0; y < mSize.Height(); ++y)
+ { // no actual data conversion, use one color channel as the gray value
+ if (int(bitmap.rowBytes()) == mSize.Width() * 4 && mSize.Width() * 1 == mScanlineSize)
{
- const uint8_t* src = static_cast<uint8_t*>(bitmap.getAddr(0, y));
- sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
- // no actual data conversion, use one color channel as the gray value
- for (long x = 0; x < mSize.Width(); ++x)
- dest[x] = src[x * 4];
+ SkConvertRGBAToGrayFast(mBuffer.get(), bitmap.getAddr32(0, 0),
+ mSize.Height() * mSize.Width());
+ }
+ else
+ {
+ for (long y = 0; y < mSize.Height(); ++y)
+ {
+ const uint32_t* src = bitmap.getAddr32(0, y);
+ sal_uInt8* dest = mBuffer.get() + mScanlineSize * y;
+ SkConvertRGBAToGrayFast(dest, src, mSize.Width());
+ }
}
}
else