2012-06-28 15:18:55 -05:00
|
|
|
--- misc/mythes-1.2.3.orig/th_gen_idx.pl
|
|
|
|
+++ misc/build/mythes-1.2.3/th_gen_idx.pl
|
2010-05-14 11:02:32 -05:00
|
|
|
@@ -1,11 +1,32 @@
|
|
|
|
-#!/usr/bin/perl
|
|
|
|
-
|
|
|
|
-# perl program to take a thesaurus structured text data file
|
|
|
|
-# and create the proper sorted index file (.idx)
|
|
|
|
+:
|
|
|
|
+eval 'exec perl -wS $0 ${1+"$@"}'
|
|
|
|
+ if 0;
|
|
|
|
+#*************************************************************************
|
|
|
|
+#
|
|
|
|
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
+#
|
|
|
|
+# Copyright 2000, 2010 Oracle and/or its affiliates.
|
|
|
|
+#
|
|
|
|
+# OpenOffice.org - a multi-platform office productivity suite
|
|
|
|
+#
|
|
|
|
+# This file is part of OpenOffice.org.
|
|
|
|
+#
|
|
|
|
+# OpenOffice.org is free software: you can redistribute it and/or modify
|
|
|
|
+# it under the terms of the GNU Lesser General Public License version 3
|
|
|
|
+# only, as published by the Free Software Foundation.
|
|
|
|
+#
|
|
|
|
+# OpenOffice.org is distributed in the hope that it will be useful,
|
|
|
|
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
+# GNU Lesser General Public License version 3 for more details
|
|
|
|
+# (a copy is included in the LICENSE file that accompanied this code).
|
|
|
|
#
|
2011-12-21 08:37:07 -06:00
|
|
|
-# typically invoked as follows:
|
2010-05-14 11:02:32 -05:00
|
|
|
-# cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx
|
|
|
|
+# You should have received a copy of the GNU Lesser General Public License
|
|
|
|
+# version 3 along with OpenOffice.org. If not, see
|
|
|
|
+# <http://www.openoffice.org/license.html>
|
|
|
|
+# for a copy of the LGPLv3 License.
|
|
|
|
#
|
|
|
|
+#*************************************************************************
|
|
|
|
|
|
|
|
sub by_entry {
|
|
|
|
my ($aent, $aoff) = split('\|',$a);
|
|
|
|
@@ -13,6 +34,27 @@ sub by_entry {
|
|
|
|
$aent cmp $bent;
|
|
|
|
}
|
|
|
|
|
|
|
|
+#FIXME: someone may want "infile" or even parameter parsing
|
|
|
|
+sub get_outfile {
|
|
|
|
+ my $next_is_file = 0;
|
|
|
|
+ foreach ( @ARGV ) {
|
|
|
|
+ if ( $next_is_file ) {
|
|
|
|
+ return $_
|
|
|
|
+ }
|
|
|
|
+ if ( $_ eq "-o" ) {
|
|
|
|
+ $next_is_file = 1;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return "";
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+sub usage {
|
|
|
|
+ print "usage:\n";
|
|
|
|
+ print "$0 -o outfile < input\n";
|
|
|
|
+
|
|
|
|
+ exit 99;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
# main routine
|
|
|
|
my $ne = 0; # number of entries in index
|
|
|
|
my @tindex=(); # the index itself
|
|
|
|
@@ -24,6 +66,10 @@ my $nm=0; # number of meaning fo
|
|
|
|
my $meaning=""; # current meaning and synonyms
|
|
|
|
my $p; # misc uses
|
|
|
|
my $encoding; # encoding used by text file
|
|
|
|
+my $outfile = "";
|
|
|
|
+
|
|
|
|
+$outfile = get_outfile();
|
|
|
|
+usage() if ( $outfile eq "" );
|
|
|
|
|
|
|
|
# top line of thesaurus provides encoding
|
|
|
|
$encoding=<STDIN>;
|
|
|
|
@@ -51,9 +97,13 @@ while ($rec=<STDIN>){
|
|
|
|
# now we have all of the information
|
|
|
|
# so sort it and then output the encoding, count and index data
|
|
|
|
@tindex = sort by_entry @tindex;
|
|
|
|
-print STDOUT "$encoding\n";
|
|
|
|
-print STDOUT "$ne\n";
|
|
|
|
+
|
|
|
|
+print "$outfile\n";
|
|
|
|
+open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!";
|
|
|
|
+print OUTFILE "$encoding\n";
|
|
|
|
+print OUTFILE "$ne\n";
|
|
|
|
foreach $one (@tindex) {
|
|
|
|
- print STDOUT "$one\n";
|
|
|
|
+ print OUTFILE "$one\n";
|
|
|
|
}
|
|
|
|
+close OUTFILE;
|
|
|
|
|