475 lines
14 KiB
C++
475 lines
14 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*************************************************************************
|
|
*
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* Copyright 2000, 2010 Oracle and/or its affiliates.
|
|
*
|
|
* OpenOffice.org - a multi-platform office productivity suite
|
|
*
|
|
* This file is part of OpenOffice.org.
|
|
*
|
|
* OpenOffice.org is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License version 3
|
|
* only, as published by the Free Software Foundation.
|
|
*
|
|
* OpenOffice.org is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License version 3 for more details
|
|
* (a copy is included in the LICENSE file that accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* version 3 along with OpenOffice.org. If not, see
|
|
* <http://www.openoffice.org/license.html>
|
|
* for a copy of the LGPLv3 License.
|
|
*
|
|
************************************************************************/
|
|
|
|
// MARKER(update_precomp.py): autogen include statement, do not remove
|
|
#include "precompiled_ucb.hxx"
|
|
#include <regexp.hxx>
|
|
|
|
#include <cstddef>
|
|
|
|
#include "osl/diagnose.h"
|
|
#include <com/sun/star/lang/IllegalArgumentException.hpp>
|
|
#include <rtl/ustrbuf.hxx>
|
|
#include <rtl/ustring.hxx>
|
|
|
|
namespace unnamed_ucb_regexp {} using namespace unnamed_ucb_regexp;
|
|
// unnamed namespaces don't work well yet...
|
|
|
|
using namespace com::sun::star;
|
|
using namespace ucb_impl;
|
|
|
|
//============================================================================
|
|
//
|
|
// Regexp
|
|
//
|
|
//============================================================================
|
|
|
|
inline Regexp::Regexp(Kind eTheKind, rtl::OUString const & rThePrefix,
|
|
bool bTheEmptyDomain, rtl::OUString const & rTheInfix,
|
|
bool bTheTranslation,
|
|
rtl::OUString const & rTheReversePrefix):
|
|
m_eKind(eTheKind),
|
|
m_aPrefix(rThePrefix),
|
|
m_aInfix(rTheInfix),
|
|
m_aReversePrefix(rTheReversePrefix),
|
|
m_bEmptyDomain(bTheEmptyDomain),
|
|
m_bTranslation(bTheTranslation)
|
|
{
|
|
OSL_ASSERT(m_eKind == KIND_DOMAIN
|
|
|| (!m_bEmptyDomain && m_aInfix.getLength() == 0));
|
|
OSL_ASSERT(m_bTranslation || m_aReversePrefix.getLength() == 0);
|
|
}
|
|
|
|
//============================================================================
|
|
namespace unnamed_ucb_regexp {
|
|
|
|
bool matchStringIgnoreCase(sal_Unicode const ** pBegin,
|
|
sal_Unicode const * pEnd,
|
|
rtl::OUString const & rString)
|
|
{
|
|
sal_Unicode const * p = *pBegin;
|
|
|
|
sal_Unicode const * q = rString.getStr();
|
|
sal_Unicode const * qEnd = q + rString.getLength();
|
|
|
|
if (pEnd - p < qEnd - q)
|
|
return false;
|
|
|
|
while (q != qEnd)
|
|
{
|
|
sal_Unicode c1 = *p++;
|
|
sal_Unicode c2 = *q++;
|
|
if (c1 >= 'a' && c1 <= 'z')
|
|
c1 -= 'a' - 'A';
|
|
if (c2 >= 'a' && c2 <= 'z')
|
|
c2 -= 'a' - 'A';
|
|
if (c1 != c2)
|
|
return false;
|
|
}
|
|
|
|
*pBegin = p;
|
|
return true;
|
|
}
|
|
|
|
}
|
|
|
|
bool Regexp::matches(rtl::OUString const & rString,
|
|
rtl::OUString * pTranslation, bool * pTranslated) const
|
|
{
|
|
sal_Unicode const * pBegin = rString.getStr();
|
|
sal_Unicode const * pEnd = pBegin + rString.getLength();
|
|
|
|
bool bMatches = false;
|
|
|
|
sal_Unicode const * p = pBegin;
|
|
if (matchStringIgnoreCase(&p, pEnd, m_aPrefix))
|
|
{
|
|
sal_Unicode const * pBlock1Begin = p;
|
|
sal_Unicode const * pBlock1End = pEnd;
|
|
|
|
sal_Unicode const * pBlock2Begin = 0;
|
|
sal_Unicode const * pBlock2End = 0;
|
|
|
|
switch (m_eKind)
|
|
{
|
|
case KIND_PREFIX:
|
|
bMatches = true;
|
|
break;
|
|
|
|
case KIND_AUTHORITY:
|
|
bMatches = p == pEnd || *p == '/' || *p == '?' || *p == '#';
|
|
break;
|
|
|
|
case KIND_DOMAIN:
|
|
if (!m_bEmptyDomain)
|
|
{
|
|
if (p == pEnd || *p == '/' || *p == '?' || *p == '#')
|
|
break;
|
|
++p;
|
|
}
|
|
for (;;)
|
|
{
|
|
sal_Unicode const * q = p;
|
|
if (matchStringIgnoreCase(&q, pEnd, m_aInfix)
|
|
&& (q == pEnd || *q == '/' || *q == '?' || *q == '#'))
|
|
{
|
|
bMatches = true;
|
|
pBlock1End = p;
|
|
pBlock2Begin = q;
|
|
pBlock2End = pEnd;
|
|
break;
|
|
}
|
|
|
|
if (p == pEnd)
|
|
break;
|
|
|
|
sal_Unicode c = *p++;
|
|
if (c == '/' || c == '?' || c == '#')
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (bMatches)
|
|
{
|
|
if (m_bTranslation)
|
|
{
|
|
if (pTranslation)
|
|
{
|
|
rtl::OUStringBuffer aBuffer(m_aReversePrefix);
|
|
aBuffer.append(pBlock1Begin, pBlock1End - pBlock1Begin);
|
|
aBuffer.append(m_aInfix);
|
|
aBuffer.append(pBlock2Begin, pBlock2End - pBlock2Begin);
|
|
*pTranslation = aBuffer.makeStringAndClear();
|
|
}
|
|
if (pTranslated)
|
|
*pTranslated = true;
|
|
}
|
|
else
|
|
{
|
|
if (pTranslation)
|
|
*pTranslation = rString;
|
|
if (pTranslated)
|
|
*pTranslated = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return bMatches;
|
|
}
|
|
|
|
//============================================================================
|
|
namespace unnamed_ucb_regexp {
|
|
|
|
inline bool isAlpha(sal_Unicode c)
|
|
{
|
|
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
|
}
|
|
|
|
inline bool isDigit(sal_Unicode c)
|
|
{
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
|
|
bool isScheme(rtl::OUString const & rString, bool bColon)
|
|
{
|
|
// Return true if rString matches <scheme> (plus a trailing ":" if bColon
|
|
// is true) from RFC 2396:
|
|
sal_Unicode const * p = rString.getStr();
|
|
sal_Unicode const * pEnd = p + rString.getLength();
|
|
if (p != pEnd && isAlpha(*p))
|
|
for (++p;;)
|
|
{
|
|
if (p == pEnd)
|
|
return !bColon;
|
|
sal_Unicode c = *p++;
|
|
if (!(isAlpha(c) || isDigit(c)
|
|
|| c == '+' || c == '-' || c == '.'))
|
|
return bColon && c == ':' && p == pEnd;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void appendStringLiteral(rtl::OUStringBuffer * pBuffer,
|
|
rtl::OUString const & rString)
|
|
{
|
|
OSL_ASSERT(pBuffer);
|
|
|
|
pBuffer->append(sal_Unicode('"'));
|
|
sal_Unicode const * p = rString.getStr();
|
|
sal_Unicode const * pEnd = p + rString.getLength();
|
|
while (p != pEnd)
|
|
{
|
|
sal_Unicode c = *p++;
|
|
if (c == '"' || c == '\\')
|
|
pBuffer->append(sal_Unicode('\\'));
|
|
pBuffer->append(c);
|
|
}
|
|
pBuffer->append(sal_Unicode('"'));
|
|
}
|
|
|
|
}
|
|
|
|
rtl::OUString Regexp::getRegexp(bool bReverse) const
|
|
{
|
|
if (m_bTranslation)
|
|
{
|
|
rtl::OUStringBuffer aBuffer;
|
|
if (bReverse)
|
|
{
|
|
if (m_aReversePrefix.getLength() != 0)
|
|
appendStringLiteral(&aBuffer, m_aReversePrefix);
|
|
}
|
|
else
|
|
{
|
|
if (m_aPrefix.getLength() != 0)
|
|
appendStringLiteral(&aBuffer, m_aPrefix);
|
|
}
|
|
switch (m_eKind)
|
|
{
|
|
case KIND_PREFIX:
|
|
aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)"));
|
|
break;
|
|
|
|
case KIND_AUTHORITY:
|
|
aBuffer.
|
|
appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)"));
|
|
break;
|
|
|
|
case KIND_DOMAIN:
|
|
aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]"));
|
|
aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
|
|
if (m_aInfix.getLength() != 0)
|
|
appendStringLiteral(&aBuffer, m_aInfix);
|
|
aBuffer.
|
|
appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)"));
|
|
break;
|
|
}
|
|
aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("->"));
|
|
if (bReverse)
|
|
{
|
|
if (m_aPrefix.getLength() != 0)
|
|
appendStringLiteral(&aBuffer, m_aPrefix);
|
|
}
|
|
else
|
|
{
|
|
if (m_aReversePrefix.getLength() != 0)
|
|
appendStringLiteral(&aBuffer, m_aReversePrefix);
|
|
}
|
|
aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1"));
|
|
return aBuffer.makeStringAndClear();
|
|
}
|
|
else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix, true))
|
|
return m_aPrefix.copy(0, m_aPrefix.getLength() - 1);
|
|
else
|
|
{
|
|
rtl::OUStringBuffer aBuffer;
|
|
if (m_aPrefix.getLength() != 0)
|
|
appendStringLiteral(&aBuffer, m_aPrefix);
|
|
switch (m_eKind)
|
|
{
|
|
case KIND_PREFIX:
|
|
aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*"));
|
|
break;
|
|
|
|
case KIND_AUTHORITY:
|
|
aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
|
|
break;
|
|
|
|
case KIND_DOMAIN:
|
|
aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]"));
|
|
aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
|
|
if (m_aInfix.getLength() != 0)
|
|
appendStringLiteral(&aBuffer, m_aInfix);
|
|
aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
|
|
break;
|
|
}
|
|
return aBuffer.makeStringAndClear();
|
|
}
|
|
}
|
|
|
|
//============================================================================
|
|
namespace unnamed_ucb_regexp {
|
|
|
|
bool matchString(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
|
|
sal_Char const * pString, size_t nStringLength)
|
|
{
|
|
sal_Unicode const * p = *pBegin;
|
|
|
|
sal_uChar const * q = reinterpret_cast< sal_uChar const * >(pString);
|
|
sal_uChar const * qEnd = q + nStringLength;
|
|
|
|
if (pEnd - p < qEnd - q)
|
|
return false;
|
|
|
|
while (q != qEnd)
|
|
{
|
|
sal_Unicode c1 = *p++;
|
|
sal_Unicode c2 = *q++;
|
|
if (c1 != c2)
|
|
return false;
|
|
}
|
|
|
|
*pBegin = p;
|
|
return true;
|
|
}
|
|
|
|
bool scanStringLiteral(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
|
|
rtl::OUString * pString)
|
|
{
|
|
sal_Unicode const * p = *pBegin;
|
|
|
|
if (p == pEnd || *p++ != '"')
|
|
return false;
|
|
|
|
rtl::OUStringBuffer aBuffer;
|
|
for (;;)
|
|
{
|
|
if (p == pEnd)
|
|
return false;
|
|
sal_Unicode c = *p++;
|
|
if (c == '"')
|
|
break;
|
|
if (c == '\\')
|
|
{
|
|
if (p == pEnd)
|
|
return false;
|
|
c = *p++;
|
|
if (c != '"' && c != '\\')
|
|
return false;
|
|
}
|
|
aBuffer.append(c);
|
|
}
|
|
|
|
*pBegin = p;
|
|
*pString = aBuffer.makeStringAndClear();
|
|
return true;
|
|
}
|
|
|
|
}
|
|
|
|
Regexp Regexp::parse(rtl::OUString const & rRegexp)
|
|
{
|
|
// Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
|
|
// where <scheme> is as defined in RFC 2396:
|
|
if (isScheme(rRegexp, false))
|
|
return Regexp(Regexp::KIND_PREFIX,
|
|
rRegexp
|
|
+ rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")),
|
|
false,
|
|
rtl::OUString(),
|
|
false,
|
|
rtl::OUString());
|
|
|
|
sal_Unicode const * p = rRegexp.getStr();
|
|
sal_Unicode const * pEnd = p + rRegexp.getLength();
|
|
|
|
rtl::OUString aPrefix;
|
|
scanStringLiteral(&p, pEnd, &aPrefix);
|
|
|
|
if (p == pEnd)
|
|
throw lang::IllegalArgumentException();
|
|
|
|
if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(".*")))
|
|
{
|
|
if (p != pEnd)
|
|
throw lang::IllegalArgumentException();
|
|
|
|
return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
|
|
false, rtl::OUString());
|
|
}
|
|
else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
|
|
{
|
|
rtl::OUString aReversePrefix;
|
|
scanStringLiteral(&p, pEnd, &aReversePrefix);
|
|
|
|
if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
|
|
|| p != pEnd)
|
|
throw lang::IllegalArgumentException();
|
|
|
|
return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
|
|
true, aReversePrefix);
|
|
}
|
|
else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
|
|
{
|
|
if (p != pEnd)
|
|
throw lang::IllegalArgumentException();
|
|
|
|
return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
|
|
false, rtl::OUString());
|
|
}
|
|
else if (matchString(&p, pEnd,
|
|
RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
|
|
{
|
|
rtl::OUString aReversePrefix;
|
|
if (!(scanStringLiteral(&p, pEnd, &aReversePrefix)
|
|
&& matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
|
|
&& p == pEnd))
|
|
throw lang::IllegalArgumentException();
|
|
|
|
return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
|
|
true, aReversePrefix);
|
|
}
|
|
else
|
|
{
|
|
bool bOpen = false;
|
|
if (p != pEnd && *p == '(')
|
|
{
|
|
++p;
|
|
bOpen = true;
|
|
}
|
|
|
|
if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
|
|
throw lang::IllegalArgumentException();
|
|
|
|
if (p == pEnd || (*p != '*' && *p != '+'))
|
|
throw lang::IllegalArgumentException();
|
|
bool bEmptyDomain = *p++ == '*';
|
|
|
|
rtl::OUString aInfix;
|
|
scanStringLiteral(&p, pEnd, &aInfix);
|
|
|
|
if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
|
|
throw lang::IllegalArgumentException();
|
|
|
|
rtl::OUString aReversePrefix;
|
|
if (bOpen
|
|
&& !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(")->"))
|
|
&& scanStringLiteral(&p, pEnd, &aReversePrefix)
|
|
&& matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))))
|
|
throw lang::IllegalArgumentException();
|
|
|
|
if (p != pEnd)
|
|
throw lang::IllegalArgumentException();
|
|
|
|
return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix,
|
|
bOpen, aReversePrefix);
|
|
}
|
|
}
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|