summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2022-01-15 11:33:10 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2022-01-15 10:52:56 +0100
commit6b973753d407d66dfa5fda86547246c486ab7087 (patch)
tree72534f1829bfd5f4d7b72de7ee0368dc44b785ff /tools
parentbe27b6f0bb73128ad4970fc5649c93d546822a84 (diff)
tdf#146754: consider xyz:123 as host:port when parsing URLs smart
... rather than scheme: and path. Change-Id: I9a48310b585b8fa3e31635f877a91f1560b065f0 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/128457 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/qa/cppunit/test_urlobj.cxx27
-rw-r--r--tools/source/fsys/urlobj.cxx30
2 files changed, 55 insertions, 2 deletions
diff --git a/tools/qa/cppunit/test_urlobj.cxx b/tools/qa/cppunit/test_urlobj.cxx
index ec64b5d66777..abcd2fe1417b 100644
--- a/tools/qa/cppunit/test_urlobj.cxx
+++ b/tools/qa/cppunit/test_urlobj.cxx
@@ -319,6 +319,32 @@ namespace tools_urlobj
obj.GetMainURL(INetURLObject::DecodeMechanism::NONE));
}
+ void testParseSmart()
+ {
+ {
+ // host:port must not be misinterpreted as scheme:path
+ INetURLObject obj("example.com:8080/foo", INetProtocol::Http);
+ CPPUNIT_ASSERT(!obj.HasError());
+ CPPUNIT_ASSERT_EQUAL(OUString("http://example.com:8080/foo"),
+ obj.GetMainURL(INetURLObject::DecodeMechanism::NONE));
+ CPPUNIT_ASSERT_EQUAL(INetProtocol::Http, obj.GetProtocol());
+ CPPUNIT_ASSERT_EQUAL(OUString("example.com"), obj.GetHost());
+ CPPUNIT_ASSERT_EQUAL(sal_uInt32(8080), obj.GetPort());
+ CPPUNIT_ASSERT_EQUAL(OUString("/foo"), obj.GetURLPath());
+ }
+ {
+ // port may only contain decimal digits, so this must be treated as unknown scheme
+ INetURLObject obj("example.com:80a0/foo", INetProtocol::Http);
+ CPPUNIT_ASSERT(!obj.HasError());
+ CPPUNIT_ASSERT_EQUAL(OUString("example.com:80a0/foo"),
+ obj.GetMainURL(INetURLObject::DecodeMechanism::NONE));
+ CPPUNIT_ASSERT_EQUAL(INetProtocol::Generic, obj.GetProtocol());
+ CPPUNIT_ASSERT(obj.isSchemeEqualTo(u"example.com"));
+ CPPUNIT_ASSERT_EQUAL(OUString(""), obj.GetHost());
+ CPPUNIT_ASSERT_EQUAL(OUString("80a0/foo"), obj.GetURLPath());
+ }
+ }
+
// Change the following lines only, if you add, remove or rename
// member functions of the current class,
// because these macros are need by auto register mechanism.
@@ -335,6 +361,7 @@ namespace tools_urlobj
CPPUNIT_TEST( testSetExtension );
CPPUNIT_TEST( testChangeScheme );
CPPUNIT_TEST( testTd146382 );
+ CPPUNIT_TEST( testParseSmart );
CPPUNIT_TEST_SUITE_END( );
}; // class createPool
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index 49d0500cabb0..1b171ad2ed8e 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -869,8 +869,34 @@ bool INetURLObject::setAbsURIRef(OUString const & rTheAbsURIRef,
aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
if (!aSynScheme.isEmpty())
{
- m_eScheme = INetProtocol::Generic;
- pPos = p1;
+ if (bSmart && m_eSmartScheme != m_eScheme && p1 != pEnd && rtl::isAsciiDigit(*p1))
+ {
+ // rTheAbsURIRef doesn't define a known scheme (handled by the "if (pPrefix)"
+ // branch above); but a known scheme is defined in m_eSmartScheme. If this
+ // scheme may have a port in authority component, then avoid misinterpreting
+ // URLs like www.foo.bar:123/baz as using unknown "www.foo.bar" scheme with
+ // 123/baz rootless path. For now, do not try to handle possible colons in
+ // user information, require such ambiguous URLs to have explicit scheme part.
+ // Also ignore possibility of empty port.
+ const SchemeInfo& rInfo = getSchemeInfo(m_eSmartScheme);
+ if (rInfo.m_bAuthority && rInfo.m_bPort)
+ {
+ // Make sure that all characters from colon to [/?#] or to EOL are digits.
+ // Or maybe make it simple, and just assume that "xyz:1..." is more likely
+ // to be host "xyz" and port "1...", than scheme "xyz" and path "1..."?
+ sal_Unicode const* p2 = p1 + 1;
+ while (p2 != pEnd && rtl::isAsciiDigit(*p2))
+ ++p2;
+ if (p2 == pEnd || *p2 == '/' || *p2 == '?' || *p2 == '#')
+ m_eScheme = m_eSmartScheme;
+ }
+ }
+
+ if (m_eScheme == INetProtocol::NotValid)
+ {
+ m_eScheme = INetProtocol::Generic;
+ pPos = p1;
+ }
}
}