Resolves: tdf#125279 do not double _x005F_ escapement
Reading OOXML _x005F_ escaped content may not get unescaped, so when writing back to OOXML do not attempt to escape it again, i.e. write _x005F_xHHHH_ as is and not as _x005F_x005F_xHHHH_. This is more a workaround, the proper fix would be to unescape _x005F_ content upon read. But then the entire "invalid XML character" escapement and handling control characters rat tail would come into play. Change-Id: I3d31dc84a362753c23a8c89f7a5d7bfd06e4367b Reviewed-on: https://gerrit.libreoffice.org/73187 Tested-by: Jenkins Reviewed-by: Eike Rathke <erack@redhat.com>
This commit is contained in:
parent
63c4e82c91
commit
f677885fec
1 changed files with 33 additions and 10 deletions
|
@ -199,6 +199,7 @@ namespace sax_fastparser {
|
|||
default:
|
||||
if (mbXescape)
|
||||
{
|
||||
char c1, c2, c3, c4;
|
||||
// Escape characters not valid in XML 1.0 as
|
||||
// _xHHHH_. A literal "_xHHHH_" has to be
|
||||
// escaped as _x005F_xHHHH_ (effectively
|
||||
|
@ -209,22 +210,44 @@ namespace sax_fastparser {
|
|||
if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen &&
|
||||
pStr[i+6] == '_' &&
|
||||
((pStr[i+1] | 0x20) == 'x') &&
|
||||
isHexDigit( pStr[i+2] ) &&
|
||||
isHexDigit( pStr[i+3] ) &&
|
||||
isHexDigit( pStr[i+4] ) &&
|
||||
isHexDigit( pStr[i+5] ))
|
||||
isHexDigit( c1 = pStr[i+2] ) &&
|
||||
isHexDigit( c2 = pStr[i+3] ) &&
|
||||
isHexDigit( c3 = pStr[i+4] ) &&
|
||||
isHexDigit( c4 = pStr[i+5] ))
|
||||
{
|
||||
// OOXML has the odd habit to write some
|
||||
// names using this that when re-saving
|
||||
// should *not* be escaped, specifically
|
||||
// _x0020_ for blanks in w:xpath values.
|
||||
if (strncmp( pStr+i+2, "0020", 4) != 0)
|
||||
if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0'))
|
||||
{
|
||||
writeBytes( "_x005F_", kXescapeLen);
|
||||
// Remember this escapement so in
|
||||
// _xHHHH_xHHHH_ only the first '_' is
|
||||
// escaped.
|
||||
nNextXescape = i + kXescapeLen;
|
||||
// When encountering "_x005F_xHHHH_"
|
||||
// assume that is an already escaped
|
||||
// sequence that was not unescaped and
|
||||
// shall be written as is, to not end
|
||||
// up with "_x005F_x005F_xHHHH_" and
|
||||
// repeated..
|
||||
if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' &&
|
||||
i + kXescapeLen <= nLen - 6 &&
|
||||
pStr[i+kXescapeLen+5] == '_' &&
|
||||
((pStr[i+kXescapeLen+0] | 0x20) == 'x') &&
|
||||
isHexDigit( pStr[i+kXescapeLen+1] ) &&
|
||||
isHexDigit( pStr[i+kXescapeLen+2] ) &&
|
||||
isHexDigit( pStr[i+kXescapeLen+3] ) &&
|
||||
isHexDigit( pStr[i+kXescapeLen+4] ))
|
||||
{
|
||||
writeBytes( &c, 1 );
|
||||
// Remember this fake escapement.
|
||||
nNextXescape = i + kXescapeLen + 6;
|
||||
}
|
||||
else
|
||||
{
|
||||
writeBytes( "_x005F_", kXescapeLen);
|
||||
// Remember this escapement so in
|
||||
// _xHHHH_xHHHH_ only the first '_'
|
||||
// is escaped.
|
||||
nNextXescape = i + kXescapeLen;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue