From bd5c3582581f37513f45b518e348f443d5d57334 Mon Sep 17 00:00:00 2001 From: Colomban Wendling Date: Tue, 2 May 2023 20:50:52 +0200 Subject: a11y: Fix returning unpaired surrogates when retrieving characters Fix implementations of XAccessibleText's getTextAtIndex(), getTextBeforeIndex() and getTextBehindIndex() when called with AccessibleTextType::CHARACTER to return the whole code point rather than an unpaired surrogate. This is still not perfect because XAccessibleText::getCharacterCount() will return an incorrect value (code units rather than code points), but it fixes the most useful case of retrieving the character at e.g. the caret offset. This fixes the GTK3 and Windows backends as well without further changes. Qt6 also mostly works according to Michael Weghorn, but for a bug on Qt's side (https://bugreports.qt.io/browse/QTBUG-113438). MacOS backend doesn't seem to be affected in the first place. Change-Id: I53f07bcba78c6b267939257542a521b106101e96 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151303 Tested-by: Jenkins Reviewed-by: Michael Weghorn --- .../accessibility/testdocuments/unicode.fodf | 135 +++++++++++++++++++++ sw/qa/extras/accessibility/unicode.cxx | 102 ++++++++++++++++ 2 files changed, 237 insertions(+) create mode 100644 sw/qa/extras/accessibility/testdocuments/unicode.fodf create mode 100644 sw/qa/extras/accessibility/unicode.cxx (limited to 'sw/qa/extras') diff --git a/sw/qa/extras/accessibility/testdocuments/unicode.fodf b/sw/qa/extras/accessibility/testdocuments/unicode.fodf new file mode 100644 index 000000000000..9bdccebf03ec --- /dev/null +++ b/sw/qa/extras/accessibility/testdocuments/unicode.fodf @@ -0,0 +1,135 @@ + + + 2022-10-12T18:05:31.4089004852023-05-11T10:35:16.229411275PT9M45S3LibreOfficeDev/7.6.0.0.alpha0$Linux_X86_64 LibreOffice_project/44c4d9ba0d480c8e2f05c9400f310184efc7e40c + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 🂡🂮🂬🂫 + akcj + + + \ No newline at end of file diff --git a/sw/qa/extras/accessibility/unicode.cxx b/sw/qa/extras/accessibility/unicode.cxx new file mode 100644 index 000000000000..b4b2b5f6fc84 --- /dev/null +++ b/sw/qa/extras/accessibility/unicode.cxx @@ -0,0 +1,102 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include +#include +#include + +#include + +using namespace css; +using namespace accessibility; + +// Checks fetching multi-unit characters +CPPUNIT_TEST_FIXTURE(test::SwAccessibleTestBase, TestUnicodeSP) +{ + loadFromSrc(u"/sw/qa/extras/accessibility/testdocuments/unicode.fodf"); + + auto xContext = getDocumentAccessibleContext()->getAccessibleChild(0)->getAccessibleContext(); + + uno::Reference para(xContext, uno::UNO_QUERY_THROW); + auto segment = para->getTextAtIndex(0, AccessibleTextType::CHARACTER); + CPPUNIT_ASSERT_EQUAL(OUString(u"\U0001f0a1"), segment.SegmentText); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), segment.SegmentStart); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), segment.SegmentEnd); + + segment = para->getTextBeforeIndex(2, AccessibleTextType::CHARACTER); + CPPUNIT_ASSERT_EQUAL(OUString(u"\U0001f0a1"), segment.SegmentText); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), segment.SegmentStart); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), segment.SegmentEnd); + + segment = para->getTextBehindIndex(0, AccessibleTextType::CHARACTER); + CPPUNIT_ASSERT_EQUAL(OUString(u"\U0001f0ae"), segment.SegmentText); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), segment.SegmentStart); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), segment.SegmentEnd); +} + +// Checks getTextBehindIndex() with multi-unit characters +CPPUNIT_TEST_FIXTURE(test::SwAccessibleTestBase, TestUnicodeSPBehindIndex) +{ + loadFromSrc(u"/sw/qa/extras/accessibility/testdocuments/unicode.fodf"); + + auto xContext = getDocumentAccessibleContext()->getAccessibleChild(0)->getAccessibleContext(); + + uno::Reference para(xContext, uno::UNO_QUERY_THROW); + auto nChCount = para->getCharacterCount(); + + // verify bounds are properly handled + CPPUNIT_ASSERT_THROW(para->getTextBehindIndex(-1, AccessibleTextType::CHARACTER), + lang::IndexOutOfBoundsException); + CPPUNIT_ASSERT_THROW(para->getTextBehindIndex(nChCount + 1, AccessibleTextType::CHARACTER), + lang::IndexOutOfBoundsException); + + auto segment = para->getTextBehindIndex(nChCount, AccessibleTextType::CHARACTER); + CPPUNIT_ASSERT_EQUAL(OUString(u""), segment.SegmentText); + CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), segment.SegmentStart); + CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), segment.SegmentEnd); + + segment = para->getTextBehindIndex(nChCount - 2, AccessibleTextType::CHARACTER); + CPPUNIT_ASSERT_EQUAL(OUString(u""), segment.SegmentText); + CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), segment.SegmentStart); + CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), segment.SegmentEnd); + + segment = para->getTextBehindIndex(nChCount - 4, AccessibleTextType::CHARACTER); + CPPUNIT_ASSERT_EQUAL(OUString(u"\U0001f0ab"), segment.SegmentText); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), segment.SegmentStart); + CPPUNIT_ASSERT_EQUAL(sal_Int32(8), segment.SegmentEnd); + + // verify bounds behave the same with single unit characters, just as a validation + xContext = getNextFlowingSibling(xContext); + CPPUNIT_ASSERT(xContext.is()); + para.set(xContext, uno::UNO_QUERY_THROW); + + nChCount = para->getCharacterCount(); + + CPPUNIT_ASSERT_THROW(para->getTextBehindIndex(-1, AccessibleTextType::CHARACTER), + lang::IndexOutOfBoundsException); + CPPUNIT_ASSERT_THROW(para->getTextBehindIndex(nChCount + 1, AccessibleTextType::CHARACTER), + lang::IndexOutOfBoundsException); + + segment = para->getTextBehindIndex(nChCount, AccessibleTextType::CHARACTER); + CPPUNIT_ASSERT_EQUAL(OUString(u""), segment.SegmentText); + CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), segment.SegmentStart); + CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), segment.SegmentEnd); + + segment = para->getTextBehindIndex(nChCount - 1, AccessibleTextType::CHARACTER); + CPPUNIT_ASSERT_EQUAL(OUString(u""), segment.SegmentText); + CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), segment.SegmentStart); + CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), segment.SegmentEnd); + + segment = para->getTextBehindIndex(nChCount - 2, AccessibleTextType::CHARACTER); + CPPUNIT_ASSERT_EQUAL(OUString(u"j"), segment.SegmentText); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), segment.SegmentStart); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), segment.SegmentEnd); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ -- cgit v1.2.3