diff options
author | Mike Kaganski <mike.kaganski@collabora.com> | 2022-01-15 11:33:10 +0300 |
---|---|---|
committer | Mike Kaganski <mike.kaganski@collabora.com> | 2022-01-15 10:52:56 +0100 |
commit | 6b973753d407d66dfa5fda86547246c486ab7087 (patch) | |
tree | 72534f1829bfd5f4d7b72de7ee0368dc44b785ff /tools | |
parent | be27b6f0bb73128ad4970fc5649c93d546822a84 (diff) |
tdf#146754: consider xyz:123 as host:port when parsing URLs smart
... rather than scheme: and path.
Change-Id: I9a48310b585b8fa3e31635f877a91f1560b065f0
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/128457
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/qa/cppunit/test_urlobj.cxx | 27 | ||||
-rw-r--r-- | tools/source/fsys/urlobj.cxx | 30 |
2 files changed, 55 insertions, 2 deletions
diff --git a/tools/qa/cppunit/test_urlobj.cxx b/tools/qa/cppunit/test_urlobj.cxx index ec64b5d66777..abcd2fe1417b 100644 --- a/tools/qa/cppunit/test_urlobj.cxx +++ b/tools/qa/cppunit/test_urlobj.cxx @@ -319,6 +319,32 @@ namespace tools_urlobj obj.GetMainURL(INetURLObject::DecodeMechanism::NONE)); } + void testParseSmart() + { + { + // host:port must not be misinterpreted as scheme:path + INetURLObject obj("example.com:8080/foo", INetProtocol::Http); + CPPUNIT_ASSERT(!obj.HasError()); + CPPUNIT_ASSERT_EQUAL(OUString("http://example.com:8080/foo"), + obj.GetMainURL(INetURLObject::DecodeMechanism::NONE)); + CPPUNIT_ASSERT_EQUAL(INetProtocol::Http, obj.GetProtocol()); + CPPUNIT_ASSERT_EQUAL(OUString("example.com"), obj.GetHost()); + CPPUNIT_ASSERT_EQUAL(sal_uInt32(8080), obj.GetPort()); + CPPUNIT_ASSERT_EQUAL(OUString("/foo"), obj.GetURLPath()); + } + { + // port may only contain decimal digits, so this must be treated as unknown scheme + INetURLObject obj("example.com:80a0/foo", INetProtocol::Http); + CPPUNIT_ASSERT(!obj.HasError()); + CPPUNIT_ASSERT_EQUAL(OUString("example.com:80a0/foo"), + obj.GetMainURL(INetURLObject::DecodeMechanism::NONE)); + CPPUNIT_ASSERT_EQUAL(INetProtocol::Generic, obj.GetProtocol()); + CPPUNIT_ASSERT(obj.isSchemeEqualTo(u"example.com")); + CPPUNIT_ASSERT_EQUAL(OUString(""), obj.GetHost()); + CPPUNIT_ASSERT_EQUAL(OUString("80a0/foo"), obj.GetURLPath()); + } + } + // Change the following lines only, if you add, remove or rename // member functions of the current class, // because these macros are need by auto register mechanism. @@ -335,6 +361,7 @@ namespace tools_urlobj CPPUNIT_TEST( testSetExtension ); CPPUNIT_TEST( testChangeScheme ); CPPUNIT_TEST( testTd146382 ); + CPPUNIT_TEST( testParseSmart ); CPPUNIT_TEST_SUITE_END( ); }; // class createPool diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx index 49d0500cabb0..1b171ad2ed8e 100644 --- a/tools/source/fsys/urlobj.cxx +++ b/tools/source/fsys/urlobj.cxx @@ -869,8 +869,34 @@ bool INetURLObject::setAbsURIRef(OUString const & rTheAbsURIRef, aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter); if (!aSynScheme.isEmpty()) { - m_eScheme = INetProtocol::Generic; - pPos = p1; + if (bSmart && m_eSmartScheme != m_eScheme && p1 != pEnd && rtl::isAsciiDigit(*p1)) + { + // rTheAbsURIRef doesn't define a known scheme (handled by the "if (pPrefix)" + // branch above); but a known scheme is defined in m_eSmartScheme. If this + // scheme may have a port in authority component, then avoid misinterpreting + // URLs like www.foo.bar:123/baz as using unknown "www.foo.bar" scheme with + // 123/baz rootless path. For now, do not try to handle possible colons in + // user information, require such ambiguous URLs to have explicit scheme part. + // Also ignore possibility of empty port. + const SchemeInfo& rInfo = getSchemeInfo(m_eSmartScheme); + if (rInfo.m_bAuthority && rInfo.m_bPort) + { + // Make sure that all characters from colon to [/?#] or to EOL are digits. + // Or maybe make it simple, and just assume that "xyz:1..." is more likely + // to be host "xyz" and port "1...", than scheme "xyz" and path "1..."? + sal_Unicode const* p2 = p1 + 1; + while (p2 != pEnd && rtl::isAsciiDigit(*p2)) + ++p2; + if (p2 == pEnd || *p2 == '/' || *p2 == '?' || *p2 == '#') + m_eScheme = m_eSmartScheme; + } + } + + if (m_eScheme == INetProtocol::NotValid) + { + m_eScheme = INetProtocol::Generic; + pPos = p1; + } } } |