2004-10-18 08:08:04 -05:00
|
|
|
/*************************************************************************
|
|
|
|
*
|
2008-04-10 04:07:37 -05:00
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
2004-10-18 08:08:04 -05:00
|
|
|
*
|
2008-04-10 04:07:37 -05:00
|
|
|
* Copyright 2008 by Sun Microsystems, Inc.
|
2004-10-18 08:08:04 -05:00
|
|
|
*
|
2008-04-10 04:07:37 -05:00
|
|
|
* OpenOffice.org - a multi-platform office productivity suite
|
2004-10-18 08:08:04 -05:00
|
|
|
*
|
2008-04-10 04:07:37 -05:00
|
|
|
* $RCSfile: ulfconv.cxx,v $
|
|
|
|
* $Revision: 1.10 $
|
2004-10-18 08:08:04 -05:00
|
|
|
*
|
2008-04-10 04:07:37 -05:00
|
|
|
* This file is part of OpenOffice.org.
|
2004-10-18 08:08:04 -05:00
|
|
|
*
|
2008-04-10 04:07:37 -05:00
|
|
|
* OpenOffice.org is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU Lesser General Public License version 3
|
|
|
|
* only, as published by the Free Software Foundation.
|
2004-10-18 08:08:04 -05:00
|
|
|
*
|
2008-04-10 04:07:37 -05:00
|
|
|
* OpenOffice.org is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Lesser General Public License version 3 for more details
|
|
|
|
* (a copy is included in the LICENSE file that accompanied this code).
|
2004-10-18 08:08:04 -05:00
|
|
|
*
|
2008-04-10 04:07:37 -05:00
|
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
|
|
* version 3 along with OpenOffice.org. If not, see
|
|
|
|
* <http://www.openoffice.org/license.html>
|
|
|
|
* for a copy of the LGPLv3 License.
|
2004-10-18 08:08:04 -05:00
|
|
|
*
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <ctype.h>
|
2006-01-16 05:58:38 -06:00
|
|
|
#include <sal/alloca.h>
|
2004-10-18 08:08:04 -05:00
|
|
|
|
|
|
|
#include <rtl/ustring.hxx>
|
|
|
|
|
|
|
|
#include <map>
|
|
|
|
#include <string>
|
|
|
|
|
2006-06-19 21:35:33 -05:00
|
|
|
/*****************************************************************************
|
|
|
|
* typedefs
|
|
|
|
*****************************************************************************/
|
2004-10-18 08:08:04 -05:00
|
|
|
|
|
|
|
typedef std::map< const std::string, rtl_TextEncoding > EncodingMap;
|
|
|
|
|
2006-06-19 21:35:33 -05:00
|
|
|
struct _pair {
|
|
|
|
const char *key;
|
|
|
|
rtl_TextEncoding value;
|
|
|
|
};
|
2004-10-18 08:08:04 -05:00
|
|
|
|
|
|
|
static int _pair_compare (const char *key, const _pair *pair);
|
|
|
|
static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member );
|
|
|
|
|
|
|
|
|
|
|
|
const _pair _ms_encoding_list[] = {
|
|
|
|
{ "0", RTL_TEXTENCODING_UTF8 },
|
|
|
|
{ "1250", RTL_TEXTENCODING_MS_1250 },
|
|
|
|
{ "1251", RTL_TEXTENCODING_MS_1251 },
|
|
|
|
{ "1252", RTL_TEXTENCODING_MS_1252 },
|
|
|
|
{ "1253", RTL_TEXTENCODING_MS_1253 },
|
|
|
|
{ "1254", RTL_TEXTENCODING_MS_1254 },
|
|
|
|
{ "1255", RTL_TEXTENCODING_MS_1255 },
|
|
|
|
{ "1256", RTL_TEXTENCODING_MS_1256 },
|
|
|
|
{ "1257", RTL_TEXTENCODING_MS_1257 },
|
|
|
|
{ "1258", RTL_TEXTENCODING_MS_1258 },
|
|
|
|
{ "874", RTL_TEXTENCODING_MS_874 },
|
|
|
|
{ "932", RTL_TEXTENCODING_MS_932 },
|
|
|
|
{ "936", RTL_TEXTENCODING_MS_936 },
|
|
|
|
{ "949", RTL_TEXTENCODING_MS_949 },
|
|
|
|
{ "950", RTL_TEXTENCODING_MS_950 }
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2006-06-19 21:35:33 -05:00
|
|
|
/*****************************************************************************
|
|
|
|
* fgets that work with unix line ends on Windows
|
|
|
|
*****************************************************************************/
|
2004-10-18 08:08:04 -05:00
|
|
|
|
|
|
|
char * my_fgets(char *s, int n, FILE *fp)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for( i=0; i < n-1; i++ )
|
|
|
|
{
|
|
|
|
int c = getc(fp);
|
|
|
|
|
|
|
|
if( c == EOF )
|
|
|
|
break;
|
|
|
|
|
|
|
|
s[i] = (char) c;
|
|
|
|
|
|
|
|
if( s[i] == '\n' )
|
|
|
|
{
|
|
|
|
i++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( i>0 )
|
|
|
|
{
|
|
|
|
s[i] = '\0';
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-06-19 21:35:33 -05:00
|
|
|
/*****************************************************************************
|
|
|
|
* compare function for binary search
|
|
|
|
*****************************************************************************/
|
2004-10-18 08:08:04 -05:00
|
|
|
|
|
|
|
static int
|
|
|
|
_pair_compare (const char *key, const _pair *pair)
|
|
|
|
{
|
|
|
|
int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2006-06-19 21:35:33 -05:00
|
|
|
/*****************************************************************************
|
|
|
|
* binary search on encoding tables
|
|
|
|
*****************************************************************************/
|
2004-10-18 08:08:04 -05:00
|
|
|
|
|
|
|
static const _pair*
|
|
|
|
_pair_search (const char *key, const _pair *base, unsigned int member )
|
|
|
|
{
|
|
|
|
unsigned int lower = 0;
|
|
|
|
unsigned int upper = member;
|
|
|
|
unsigned int current;
|
|
|
|
int comparison;
|
|
|
|
|
|
|
|
/* check for validity of input */
|
|
|
|
if ( (key == NULL) || (base == NULL) || (member == 0) )
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* binary search */
|
|
|
|
while ( lower < upper )
|
|
|
|
{
|
|
|
|
current = (lower + upper) / 2;
|
|
|
|
comparison = _pair_compare( key, base + current );
|
|
|
|
if (comparison < 0)
|
|
|
|
upper = current;
|
|
|
|
else
|
|
|
|
if (comparison > 0)
|
|
|
|
lower = current + 1;
|
|
|
|
else
|
|
|
|
return base + current;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/************************************************************************
|
|
|
|
* read_encoding_table
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
void read_encoding_table(char * file, EncodingMap& aEncodingMap)
|
|
|
|
{
|
|
|
|
FILE * fp = fopen(file, "r");
|
|
|
|
if ( ! fp ) {
|
|
|
|
fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno));
|
|
|
|
exit(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
char buffer[512];
|
|
|
|
while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) {
|
|
|
|
|
|
|
|
// strip comment lines
|
|
|
|
if ( buffer[0] == '#' )
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// find end of language string
|
|
|
|
char * cp;
|
|
|
|
for ( cp = buffer; ! isspace(*cp); cp++ )
|
|
|
|
;
|
|
|
|
*cp = '\0';
|
|
|
|
|
|
|
|
// find start of codepage string
|
|
|
|
for ( ++cp; isspace(*cp); ++cp )
|
|
|
|
;
|
|
|
|
char * codepage = cp;
|
|
|
|
|
|
|
|
// find end of codepage string
|
|
|
|
for ( ++cp; ! isspace(*cp); ++cp )
|
|
|
|
;
|
|
|
|
*cp = '\0';
|
|
|
|
|
|
|
|
// find the correct mapping for codepage
|
|
|
|
const unsigned int members = sizeof( _ms_encoding_list ) / sizeof( _pair );
|
|
|
|
const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members );
|
|
|
|
|
|
|
|
if ( encoding != NULL ) {
|
|
|
|
const std::string language(buffer);
|
|
|
|
aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/************************************************************************
|
|
|
|
* print_legacy_mixed
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
void print_legacy_mixed(
|
|
|
|
FILE * ostream,
|
|
|
|
const rtl::OUString& aString,
|
|
|
|
const std::string& language,
|
|
|
|
EncodingMap& aEncodingMap)
|
|
|
|
{
|
|
|
|
EncodingMap::iterator iter = aEncodingMap.find(language);
|
|
|
|
|
|
|
|
if ( iter != aEncodingMap.end() ) {
|
|
|
|
fputs(OUStringToOString(aString, iter->second).getStr(), ostream);
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/************************************************************************
|
|
|
|
* print_java_style
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
void print_java_style(FILE * ostream, const rtl::OUString& aString)
|
|
|
|
{
|
|
|
|
int imax = aString.getLength();
|
|
|
|
for (int i = 0; i < imax; i++) {
|
|
|
|
sal_Unicode uc = aString[i];
|
|
|
|
if ( uc < 128 ) {
|
|
|
|
fprintf(ostream, "%c", (char) uc);
|
|
|
|
} else {
|
|
|
|
fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/************************************************************************
|
|
|
|
* main
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
int main( int argc, char * const argv[] )
|
|
|
|
{
|
|
|
|
EncodingMap aEncodingMap;
|
|
|
|
|
|
|
|
FILE *istream = stdin;
|
|
|
|
FILE *ostream = stdout;
|
|
|
|
|
2005-01-21 04:35:34 -06:00
|
|
|
char *outfile = NULL;
|
|
|
|
|
2004-10-18 08:08:04 -05:00
|
|
|
int errflg = 0;
|
|
|
|
int argi;
|
|
|
|
|
|
|
|
for( argi=1; argi < argc; argi++ )
|
|
|
|
{
|
|
|
|
if( argv[argi][0] == '-' && argv[argi][2] == '\0' )
|
|
|
|
{
|
|
|
|
switch(argv[argi][1]) {
|
|
|
|
case 'o':
|
|
|
|
if (argi+1 >= argc || argv[argi+1][0] == '-')
|
|
|
|
{
|
2006-07-19 10:08:39 -05:00
|
|
|
fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
|
2004-10-18 08:08:04 -05:00
|
|
|
errflg++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
++argi;
|
2005-01-21 04:35:34 -06:00
|
|
|
outfile = argv[argi];
|
2004-10-18 08:08:04 -05:00
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
if (argi+1 >= argc || argv[argi+1][0] == '-')
|
|
|
|
{
|
|
|
|
fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
|
|
|
|
errflg++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
read_encoding_table(argv[++argi], aEncodingMap);
|
|
|
|
break;
|
|
|
|
default:
|
2006-07-19 10:08:39 -05:00
|
|
|
fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]);
|
2004-10-18 08:08:04 -05:00
|
|
|
errflg++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (errflg) {
|
|
|
|
fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n");
|
|
|
|
exit(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* assign input file to stdin */
|
2005-01-21 04:35:34 -06:00
|
|
|
if ( argi < argc )
|
|
|
|
{
|
2004-10-18 08:08:04 -05:00
|
|
|
istream = fopen(argv[argi], "r");
|
|
|
|
if ( istream == NULL ) {
|
|
|
|
fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno));
|
|
|
|
exit(2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-01-21 04:35:34 -06:00
|
|
|
/* open output file if any */
|
|
|
|
if ( outfile )
|
|
|
|
{
|
|
|
|
ostream = fopen(outfile, "w");
|
|
|
|
if ( ostream == NULL ) {
|
2006-11-08 04:59:33 -06:00
|
|
|
fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno));
|
2005-01-21 04:35:34 -06:00
|
|
|
exit(2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-10-18 08:08:04 -05:00
|
|
|
/* read line by line from stdin */
|
2004-11-18 01:21:15 -06:00
|
|
|
char buffer[65536];
|
2004-10-18 08:08:04 -05:00
|
|
|
while ( NULL != fgets(buffer, sizeof(buffer), istream) ) {
|
|
|
|
|
|
|
|
/* only handle lines containing " = " */
|
|
|
|
char * cp = strstr(buffer, " = \"");
|
|
|
|
if ( cp ) {
|
|
|
|
rtl::OUString aString;
|
|
|
|
|
|
|
|
/* find end of lang string */
|
|
|
|
int n;
|
|
|
|
for ( n=0; ! isspace(buffer[n]); n++ )
|
|
|
|
;
|
|
|
|
|
|
|
|
std::string line = buffer;
|
|
|
|
std::string lang(line, 0, n);
|
|
|
|
|
|
|
|
cp += 4;
|
|
|
|
rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp,
|
|
|
|
RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS );
|
|
|
|
|
|
|
|
fprintf(ostream, "%s = \"", lang.c_str());
|
|
|
|
|
|
|
|
if ( aEncodingMap.empty() ) {
|
|
|
|
print_java_style(ostream, aString);
|
|
|
|
} else {
|
|
|
|
print_legacy_mixed(ostream, aString, lang, aEncodingMap);
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(ostream, "\"\n");
|
|
|
|
|
|
|
|
|
|
|
|
} else {
|
2009-02-18 04:41:06 -06:00
|
|
|
fputs(buffer, ostream);
|
2004-10-18 08:08:04 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|