a85a91a7d9
2008/06/15 11:51:30 erack 1.6.20.2: check if CurrencyID resembles ISO 4217 code and equals BankSymbol 2008/06/14 18:46:37 erack 1.6.20.1: continue lookup for compatible symbol
318 lines
9 KiB
Awk
318 lines
9 KiB
Awk
#!/usr/bin/gawk -f
|
|
# Usage: gawk -f currency-check.awk *.xml
|
|
# Check any
|
|
# <FormatCode>...[$xxx-...]...</FormatCode>
|
|
# against every
|
|
# <CurrencySymbol>xxx</CurrencySymbol>
|
|
# definition of the same XML file and output symbols if no match was found.
|
|
# For formatindex="12" to formatindex="15" and for formatindex="17" it is
|
|
# checked if the used currency symbol is the usedInCompatibleFormatCodes
|
|
# currency symbol as it is needed by the number formatter.
|
|
# Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
|
|
# is used instead of a real currency symbol.
|
|
# Author: Eike Rathke <er@openoffice.org>
|
|
|
|
BEGIN {
|
|
file = ""
|
|
}
|
|
|
|
|
|
file != FILENAME {
|
|
if ( file )
|
|
checkIt()
|
|
file = FILENAME
|
|
line = 0
|
|
nFormats = 0
|
|
nCurrencies = 0
|
|
bFormatAuto = 0
|
|
sReplaceFrom = ""
|
|
sReplaceTo = ""
|
|
sMatchReplace = ""
|
|
sRefCurrencyFromLocale = ""
|
|
crlf = 0
|
|
}
|
|
|
|
{
|
|
++line
|
|
# If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
|
|
# are boo anyways.
|
|
if ( /\x0D$/ )
|
|
{
|
|
print "Error: not Unix line ending in line " line
|
|
crlf = 1
|
|
exit(1)
|
|
}
|
|
if ( $1 ~ /^<LC_FORMAT(>|$)/ )
|
|
{
|
|
if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
|
|
{
|
|
sReplaceFrom = "\\[CURRENCY\\]"
|
|
sMatchReplace = "^<FormatCode>.*" sReplaceFrom
|
|
}
|
|
for ( j=2; j<=NF; ++j )
|
|
{
|
|
if ( $j ~ /^replaceTo="/ )
|
|
{
|
|
l = 12
|
|
if ( $j ~ />$/ )
|
|
++l
|
|
if ( $j ~ /\/>$/ )
|
|
++l
|
|
sReplaceTo = substr( $j, 12, length($j)-l )
|
|
}
|
|
}
|
|
}
|
|
else if ( $1 ~ /^<FormatElement(>|$)/ )
|
|
{
|
|
if ( $0 ~ /usage="CURRENCY"/ )
|
|
{
|
|
if ( $0 ~ /formatindex="1[23457]"/ )
|
|
bFormatAuto = 1
|
|
else
|
|
bFormatAuto = 0
|
|
}
|
|
}
|
|
else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
|
|
(sMatchReplace && $0 ~ sMatchReplace ) )
|
|
{
|
|
if ( sReplaceFrom )
|
|
gsub( sReplaceFrom, sReplaceTo )
|
|
split( $0, arr, /<|>/ )
|
|
split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
|
|
for ( j in code )
|
|
{
|
|
if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
|
|
{
|
|
FormatLine[nFormats] = file " line " line
|
|
FormatAuto[nFormats] = bFormatAuto
|
|
Formats[nFormats++] = code[j]
|
|
}
|
|
}
|
|
bFormatAuto = 0
|
|
}
|
|
else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
|
|
{
|
|
for ( j=2; j<=NF; ++j )
|
|
{
|
|
if ( $j ~ /^ref="/ )
|
|
{
|
|
l = 6
|
|
if ( $j ~ />$/ )
|
|
++l
|
|
if ( $j ~ /\/>$/ )
|
|
++l
|
|
locale = substr( $j, 6, length($j)-l )
|
|
sRefCurrencyFromLocale = file
|
|
oldfile = file
|
|
oldline = line
|
|
file = locale ".xml"
|
|
line = 0
|
|
while ( (getline <file) > 0 )
|
|
{
|
|
++line
|
|
getCurrencyParams()
|
|
}
|
|
close( file )
|
|
if ( !line )
|
|
print "ref locale not available: " file \
|
|
" (from " oldfile " line " oldline ")"
|
|
file = oldfile
|
|
line = oldline
|
|
sRefCurrencyFromLocale = ""
|
|
}
|
|
}
|
|
}
|
|
else
|
|
getCurrencyParams()
|
|
}
|
|
|
|
|
|
END {
|
|
if ( file && !crlf )
|
|
checkIt()
|
|
}
|
|
|
|
|
|
function getCurrencyParams() {
|
|
# Assumes that each element is on a line on its own!
|
|
if ( $1 ~ /^<Currency(>|$)/ )
|
|
{
|
|
if ( $0 ~ /default="true"/ )
|
|
SymbolDefault[nCurrencies] = 1
|
|
else
|
|
SymbolDefault[nCurrencies] = 0
|
|
if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
|
|
SymbolCompati[nCurrencies] = 1
|
|
else
|
|
SymbolCompati[nCurrencies] = 0
|
|
}
|
|
else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
|
|
{
|
|
split( $0, arr, /<|>/ )
|
|
if ( sRefCurrencyFromLocale )
|
|
IDLine[nCurrencies] = file " line " line \
|
|
" (referenced from " sRefCurrencyFromLocale ")"
|
|
else
|
|
IDLine[nCurrencies] = file " line " line
|
|
IDs[nCurrencies] = arr[3]
|
|
}
|
|
else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
|
|
{
|
|
split( $0, arr, /<|>/ )
|
|
if ( sRefCurrencyFromLocale )
|
|
SymbolLine[nCurrencies] = file " line " line \
|
|
" (referenced from " sRefCurrencyFromLocale ")"
|
|
else
|
|
SymbolLine[nCurrencies] = file " line " line
|
|
Symbols[nCurrencies] = arr[3]
|
|
}
|
|
else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
|
|
{
|
|
split( $0, arr, /<|>/ )
|
|
if ( sRefCurrencyFromLocale )
|
|
BankSymbolLine[nCurrencies] = file " line " line \
|
|
" (referenced from " sRefCurrencyFromLocale ")"
|
|
else
|
|
BankSymbolLine[nCurrencies] = file " line " line
|
|
BankSymbols[nCurrencies] = arr[3]
|
|
}
|
|
else if ( $1 ~ /^<\/Currency>/ )
|
|
{
|
|
++nCurrencies
|
|
}
|
|
}
|
|
|
|
|
|
function checkIt() {
|
|
bad = 0
|
|
for ( j=0; j<nFormats; ++j )
|
|
{
|
|
state = FormatInSymbol( Formats[j] )
|
|
if ( Formats[j] == "\xc2\xa4" )
|
|
{
|
|
bad = 1
|
|
print " bad: `" Formats[j] "' (" FormatLine[j] ")"
|
|
}
|
|
else if ( state == 0 )
|
|
{
|
|
bad = 1
|
|
print "unknown: `" Formats[j] "' (" FormatLine[j] ")"
|
|
}
|
|
else if ( FormatAuto[j] && state < 2 )
|
|
{
|
|
bad = 1
|
|
print "badauto: `" Formats[j] "' (" FormatLine[j] ")"
|
|
}
|
|
}
|
|
if ( bad )
|
|
{
|
|
for ( j=0; j<nCurrencies; ++j )
|
|
{
|
|
bDef = 0
|
|
if ( Symbols[j] == "\xc2\xa4" )
|
|
print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
|
|
if ( SymbolDefault[j] )
|
|
{
|
|
bDef = 1
|
|
print "default: `" Symbols[j] "' (" SymbolLine[j] ")"
|
|
}
|
|
if ( SymbolCompati[j] )
|
|
{
|
|
bDef = 1
|
|
print "compati: `" Symbols[j] "' (" SymbolLine[j] ")"
|
|
}
|
|
if ( !bDef )
|
|
print "defined: `" Symbols[j] "' (" SymbolLine[j] ")"
|
|
}
|
|
}
|
|
else
|
|
{
|
|
bHasDefault = 0
|
|
bHasCompati = 0
|
|
for ( j=0; j<nCurrencies; ++j )
|
|
{
|
|
if ( Symbols[j] == "\xc2\xa4" )
|
|
{
|
|
bad = 1
|
|
print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
|
|
}
|
|
if ( SymbolDefault[j] )
|
|
{
|
|
if ( !bHasDefault )
|
|
bHasDefault = 1
|
|
else
|
|
{
|
|
bad = 1
|
|
print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")"
|
|
}
|
|
}
|
|
if ( SymbolCompati[j] )
|
|
{
|
|
if ( !bHasCompati )
|
|
bHasCompati = 1
|
|
else
|
|
{
|
|
bad = 1
|
|
print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")"
|
|
}
|
|
}
|
|
}
|
|
if ( !bHasDefault )
|
|
{
|
|
bad = 1
|
|
print " no default: (" file ")"
|
|
}
|
|
if ( !bHasCompati )
|
|
{
|
|
bad = 1
|
|
print " no compati: (" file ")"
|
|
}
|
|
}
|
|
for ( j=0; j<nCurrencies; ++j )
|
|
{
|
|
# Check if CurrencyID at least resembles some ISO 4217 code.
|
|
# The only exception is zh_MO that had an erroneous original data set
|
|
# with BankSymbol="P" (stored as ISO code in documents, hence copied to
|
|
# CurrencyID now) and needs that entry for legacy documents.
|
|
# There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
|
|
# lower case except 'a', regardless of IGNORECASE setting, hence this
|
|
# ugly notation. [[:upper:]] wouldn't be correct since we want only
|
|
# ASCII to match.
|
|
if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
|
|
&& !(file == "zh_MO.xml" && IDs[j] == "P") )
|
|
{
|
|
bad = 1
|
|
print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")"
|
|
}
|
|
# CurrencyID should equal BankSymbol for now.
|
|
if ( IDs[j] != BankSymbols[j] )
|
|
{
|
|
bad = 1
|
|
print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
|
|
"' (" IDLine[j] " and " BankSymbolLine[j] ")"
|
|
}
|
|
}
|
|
if ( bad )
|
|
print ""
|
|
}
|
|
|
|
|
|
function FormatInSymbol( format ) {
|
|
state = 0
|
|
for ( nSym=0; nSym<nCurrencies; ++nSym )
|
|
{
|
|
if ( format == Symbols[nSym] )
|
|
{
|
|
# Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
|
|
# for AZM and AZN), continue to lookup if the match isn't the
|
|
# compatible one.
|
|
if ( SymbolCompati[nSym] )
|
|
return 2
|
|
else
|
|
state = 1
|
|
}
|
|
}
|
|
return state
|
|
}
|
|
|
|
# vim: ts=4 sw=4 expandtab
|