office-gobmx/i18npool/source/localedata/data/currency-check.awk
Rüdiger Timm a85a91a7d9 INTEGRATION: CWS locdat30 (1.6.20); FILE MERGED
2008/06/15 11:51:30 erack 1.6.20.2: check if CurrencyID resembles ISO 4217 code and equals BankSymbol
2008/06/14 18:46:37 erack 1.6.20.1: continue lookup for compatible symbol
2008-07-02 08:55:46 +00:00

318 lines
9 KiB
Awk

#!/usr/bin/gawk -f
# Usage: gawk -f currency-check.awk *.xml
# Check any
# <FormatCode>...[$xxx-...]...</FormatCode>
# against every
# <CurrencySymbol>xxx</CurrencySymbol>
# definition of the same XML file and output symbols if no match was found.
# For formatindex="12" to formatindex="15" and for formatindex="17" it is
# checked if the used currency symbol is the usedInCompatibleFormatCodes
# currency symbol as it is needed by the number formatter.
# Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
# is used instead of a real currency symbol.
# Author: Eike Rathke <er@openoffice.org>
BEGIN {
file = ""
}
file != FILENAME {
if ( file )
checkIt()
file = FILENAME
line = 0
nFormats = 0
nCurrencies = 0
bFormatAuto = 0
sReplaceFrom = ""
sReplaceTo = ""
sMatchReplace = ""
sRefCurrencyFromLocale = ""
crlf = 0
}
{
++line
# If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
# are boo anyways.
if ( /\x0D$/ )
{
print "Error: not Unix line ending in line " line
crlf = 1
exit(1)
}
if ( $1 ~ /^<LC_FORMAT(>|$)/ )
{
if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
{
sReplaceFrom = "\\[CURRENCY\\]"
sMatchReplace = "^<FormatCode>.*" sReplaceFrom
}
for ( j=2; j<=NF; ++j )
{
if ( $j ~ /^replaceTo="/ )
{
l = 12
if ( $j ~ />$/ )
++l
if ( $j ~ /\/>$/ )
++l
sReplaceTo = substr( $j, 12, length($j)-l )
}
}
}
else if ( $1 ~ /^<FormatElement(>|$)/ )
{
if ( $0 ~ /usage="CURRENCY"/ )
{
if ( $0 ~ /formatindex="1[23457]"/ )
bFormatAuto = 1
else
bFormatAuto = 0
}
}
else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
(sMatchReplace && $0 ~ sMatchReplace ) )
{
if ( sReplaceFrom )
gsub( sReplaceFrom, sReplaceTo )
split( $0, arr, /<|>/ )
split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
for ( j in code )
{
if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
{
FormatLine[nFormats] = file " line " line
FormatAuto[nFormats] = bFormatAuto
Formats[nFormats++] = code[j]
}
}
bFormatAuto = 0
}
else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
{
for ( j=2; j<=NF; ++j )
{
if ( $j ~ /^ref="/ )
{
l = 6
if ( $j ~ />$/ )
++l
if ( $j ~ /\/>$/ )
++l
locale = substr( $j, 6, length($j)-l )
sRefCurrencyFromLocale = file
oldfile = file
oldline = line
file = locale ".xml"
line = 0
while ( (getline <file) > 0 )
{
++line
getCurrencyParams()
}
close( file )
if ( !line )
print "ref locale not available: " file \
" (from " oldfile " line " oldline ")"
file = oldfile
line = oldline
sRefCurrencyFromLocale = ""
}
}
}
else
getCurrencyParams()
}
END {
if ( file && !crlf )
checkIt()
}
function getCurrencyParams() {
# Assumes that each element is on a line on its own!
if ( $1 ~ /^<Currency(>|$)/ )
{
if ( $0 ~ /default="true"/ )
SymbolDefault[nCurrencies] = 1
else
SymbolDefault[nCurrencies] = 0
if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
SymbolCompati[nCurrencies] = 1
else
SymbolCompati[nCurrencies] = 0
}
else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
{
split( $0, arr, /<|>/ )
if ( sRefCurrencyFromLocale )
IDLine[nCurrencies] = file " line " line \
" (referenced from " sRefCurrencyFromLocale ")"
else
IDLine[nCurrencies] = file " line " line
IDs[nCurrencies] = arr[3]
}
else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
{
split( $0, arr, /<|>/ )
if ( sRefCurrencyFromLocale )
SymbolLine[nCurrencies] = file " line " line \
" (referenced from " sRefCurrencyFromLocale ")"
else
SymbolLine[nCurrencies] = file " line " line
Symbols[nCurrencies] = arr[3]
}
else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
{
split( $0, arr, /<|>/ )
if ( sRefCurrencyFromLocale )
BankSymbolLine[nCurrencies] = file " line " line \
" (referenced from " sRefCurrencyFromLocale ")"
else
BankSymbolLine[nCurrencies] = file " line " line
BankSymbols[nCurrencies] = arr[3]
}
else if ( $1 ~ /^<\/Currency>/ )
{
++nCurrencies
}
}
function checkIt() {
bad = 0
for ( j=0; j<nFormats; ++j )
{
state = FormatInSymbol( Formats[j] )
if ( Formats[j] == "\xc2\xa4" )
{
bad = 1
print " bad: `" Formats[j] "' (" FormatLine[j] ")"
}
else if ( state == 0 )
{
bad = 1
print "unknown: `" Formats[j] "' (" FormatLine[j] ")"
}
else if ( FormatAuto[j] && state < 2 )
{
bad = 1
print "badauto: `" Formats[j] "' (" FormatLine[j] ")"
}
}
if ( bad )
{
for ( j=0; j<nCurrencies; ++j )
{
bDef = 0
if ( Symbols[j] == "\xc2\xa4" )
print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
if ( SymbolDefault[j] )
{
bDef = 1
print "default: `" Symbols[j] "' (" SymbolLine[j] ")"
}
if ( SymbolCompati[j] )
{
bDef = 1
print "compati: `" Symbols[j] "' (" SymbolLine[j] ")"
}
if ( !bDef )
print "defined: `" Symbols[j] "' (" SymbolLine[j] ")"
}
}
else
{
bHasDefault = 0
bHasCompati = 0
for ( j=0; j<nCurrencies; ++j )
{
if ( Symbols[j] == "\xc2\xa4" )
{
bad = 1
print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")"
}
if ( SymbolDefault[j] )
{
if ( !bHasDefault )
bHasDefault = 1
else
{
bad = 1
print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")"
}
}
if ( SymbolCompati[j] )
{
if ( !bHasCompati )
bHasCompati = 1
else
{
bad = 1
print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")"
}
}
}
if ( !bHasDefault )
{
bad = 1
print " no default: (" file ")"
}
if ( !bHasCompati )
{
bad = 1
print " no compati: (" file ")"
}
}
for ( j=0; j<nCurrencies; ++j )
{
# Check if CurrencyID at least resembles some ISO 4217 code.
# The only exception is zh_MO that had an erroneous original data set
# with BankSymbol="P" (stored as ISO code in documents, hence copied to
# CurrencyID now) and needs that entry for legacy documents.
# There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
# lower case except 'a', regardless of IGNORECASE setting, hence this
# ugly notation. [[:upper:]] wouldn't be correct since we want only
# ASCII to match.
if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
&& !(file == "zh_MO.xml" && IDs[j] == "P") )
{
bad = 1
print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")"
}
# CurrencyID should equal BankSymbol for now.
if ( IDs[j] != BankSymbols[j] )
{
bad = 1
print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
"' (" IDLine[j] " and " BankSymbolLine[j] ")"
}
}
if ( bad )
print ""
}
function FormatInSymbol( format ) {
state = 0
for ( nSym=0; nSym<nCurrencies; ++nSym )
{
if ( format == Symbols[nSym] )
{
# Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
# for AZM and AZN), continue to lookup if the match isn't the
# compatible one.
if ( SymbolCompati[nSym] )
return 2
else
state = 1
}
}
return state
}
# vim: ts=4 sw=4 expandtab