diff options
author | Eike Rathke <erack@redhat.com> | 2019-05-29 21:08:38 +0200 |
---|---|---|
committer | Aron Budea <aron.budea@collabora.com> | 2019-06-04 16:35:57 +0200 |
commit | 70f872a211a296ef6b45828e0fe92cd752a89bb3 (patch) | |
tree | 13c7a4559214dcf40d475d1a85e6be51a0c06c0d /sax | |
parent | 36ce4fff0f96d711d185b0da60ed3d71e3a10d7a (diff) |
Resolves: tdf#125279 do not double _x005F_ escapement
Reading OOXML _x005F_ escaped content may not get unescaped, so
when writing back to OOXML do not attempt to escape it again, i.e.
write _x005F_xHHHH_ as is and not as _x005F_x005F_xHHHH_.
This is more a workaround, the proper fix would be to unescape
_x005F_ content upon read. But then the entire "invalid XML
character" escapement and handling control characters rat tail
would come into play.
Change-Id: I3d31dc84a362753c23a8c89f7a5d7bfd06e4367b
Reviewed-on: https://gerrit.libreoffice.org/73187
Tested-by: Jenkins
Reviewed-by: Eike Rathke <erack@redhat.com>
(cherry picked from commit f677885fec59f252f36673ee4d8c0b4863625a4d)
Diffstat (limited to 'sax')
-rw-r--r-- | sax/source/tools/fastserializer.cxx | 43 |
1 files changed, 33 insertions, 10 deletions
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx index a0ae255b36eb..87b23b45e17c 100644 --- a/sax/source/tools/fastserializer.cxx +++ b/sax/source/tools/fastserializer.cxx @@ -199,6 +199,7 @@ namespace sax_fastparser { default: if (mbXescape) { + char c1, c2, c3, c4; // Escape characters not valid in XML 1.0 as // _xHHHH_. A literal "_xHHHH_" has to be // escaped as _x005F_xHHHH_ (effectively @@ -209,22 +210,44 @@ namespace sax_fastparser { if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen && pStr[i+6] == '_' && ((pStr[i+1] | 0x20) == 'x') && - isHexDigit( pStr[i+2] ) && - isHexDigit( pStr[i+3] ) && - isHexDigit( pStr[i+4] ) && - isHexDigit( pStr[i+5] )) + isHexDigit( c1 = pStr[i+2] ) && + isHexDigit( c2 = pStr[i+3] ) && + isHexDigit( c3 = pStr[i+4] ) && + isHexDigit( c4 = pStr[i+5] )) { // OOXML has the odd habit to write some // names using this that when re-saving // should *not* be escaped, specifically // _x0020_ for blanks in w:xpath values. - if (strncmp( pStr+i+2, "0020", 4) != 0) + if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0')) { - writeBytes( "_x005F_", kXescapeLen); - // Remember this escapement so in - // _xHHHH_xHHHH_ only the first '_' is - // escaped. - nNextXescape = i + kXescapeLen; + // When encountering "_x005F_xHHHH_" + // assume that is an already escaped + // sequence that was not unescaped and + // shall be written as is, to not end + // up with "_x005F_x005F_xHHHH_" and + // repeated.. + if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' && + i + kXescapeLen <= nLen - 6 && + pStr[i+kXescapeLen+5] == '_' && + ((pStr[i+kXescapeLen+0] | 0x20) == 'x') && + isHexDigit( pStr[i+kXescapeLen+1] ) && + isHexDigit( pStr[i+kXescapeLen+2] ) && + isHexDigit( pStr[i+kXescapeLen+3] ) && + isHexDigit( pStr[i+kXescapeLen+4] )) + { + writeBytes( &c, 1 ); + // Remember this fake escapement. + nNextXescape = i + kXescapeLen + 6; + } + else + { + writeBytes( "_x005F_", kXescapeLen); + // Remember this escapement so in + // _xHHHH_xHHHH_ only the first '_' + // is escaped. + nNextXescape = i + kXescapeLen; + } break; } } |