mythes12: move to proper external mythes module

2010-04-27 17:21:14 +02:00 · 2010-04-27 17:21:14 +02:00 · af15b9a545
commit af15b9a545
parent 14a6186576
10 changed files with 2 additions and 937 deletions
--- a/lingucomponent/prj/build.lst
+++ b/lingucomponent/prj/build.lst
@ -1,9 +1,8 @@
-lc  lingucomponent  :      linguistic libtextcat svl HYPHEN:hyphen HUNSPELL:hunspell NULL
+lc  lingucomponent  :      linguistic libtextcat svl HYPHEN:hyphen HUNSPELL:hunspell MYTHES:mythes NULL
 lc	lingucomponent								usr1	-	all	lc_mkout NULL
 lc  lingucomponent\inc                                      nmake   -   all lc_inc NULL
 lc  lingucomponent\source\lingutil                          nmake   -   all lc_util lc_inc NULL
-lc	lingucomponent\source\thesaurus\mythes					nmake	-	all	lc_mythes lc_util lc_inc NULL
+lc	lingucomponent\source\thesaurus\libnth					nmake	-	all	lc_libnth lc_util lc_inc NULL
 lc	lingucomponent\source\thesaurus\libnth					nmake	-	all	lc_libnth lc_mythes lc_util lc_inc NULL
 lc  lingucomponent\source\spellcheck\spell                  nmake   -   all lc_libspell lc_util lc_inc NULL
 lc  lingucomponent\source\hyphenator\altlinuxhyph\hyphen    nmake   -   all lc_libhyphen lc_util lc_inc NULL
 lc  lingucomponent\source\languageguessing                  nmake   -   all lc_languageguessing lc_util lc_inc NULL
--- a/lingucomponent/source/thesaurus/mythes/Makefile
+++ b/lingucomponent/source/thesaurus/mythes/Makefile
@ -1,39 +0,0 @@
 CXX=g++
 CXXFLAGS= -O2 -Wall -ansi -pedantic -I.
 LDFLAGS=-L. -lmythes
 LIBS=libmythes.a
 AR=ar rc
 RANLIB=ranlib
 OBJS = mythes.o 
 all: example
 libmythes.a: $(OBJS)
 	$(AR) $@ $(OBJS)
 	-@ ($(RANLIB) $@ || true) >/dev/null 2>&1
 example: example.o $(LIBS)
 	$(CXX) $(CXXFLAGS) -o $@ example.o $(LDFLAGS)
 %.o: %.cxx 
 	$(CXX) $(CXXFLAGS) -c $<
 clean:
 	rm -f *.o *~ example libthes.a
 distclean:	clean
 depend:
 	makedepend -- $(CXXFLAGS) -- *.[ch]xx
 # DO NOT DELETE THIS LINE -- make depend depends on it.
 mythes.o: mythes.hxx
 example.o: mythes.hxx 
--- a/lingucomponent/source/thesaurus/mythes/README
+++ b/lingucomponent/source/thesaurus/mythes/README
@ -1,60 +0,0 @@
 MyThes is a simple thesaurus that uses a structured
 text data file and an index file with binary search
 to lookup words and phrases and return information
 on part of speech, meanings, and synonyms
 MyThes was written to provide a thesaurus for the 
 OpenOffice.org project 
 The Main features of MyThes are:
 1. written in C++ to make it easier to interface with 
   Pspell, OpenOffice, AbiWord, etc
 2. it is stateless, uses no static variables and
   should be completely reentrant with no ifdefs  
 3. it compiles with -ansi and -pedantic and -Wall
   with no warnings so it should be quite portable
 4. it uses a perl program to read the structured
   text file and create the index needed for bianry
   searching (see dictionaries/en_US/th_gen_idx.pl)
 5. it is very simple with *lots* of comments.
   The main "smarts" are in the structure of the
   text file that makes up the thesaurus data
 6. It comes with a ready-to-go structured thesaurus
   data file for en_US extracted from the WordNet-2.0 data.
   (see dictioanries/en_US/th_en_US_new.dat)
   Please see WordNet_license.txt and WordNet_readme.txt
   for more information on the very useful project!
   (found in dictionaries/en_US/)
 7. The source code has a BSD license (and  no advertising clause)
 MyThes has the world's simplest Makefile and no 
 configure support. It does come with a simple example 
 program that looks up some words and returns meanings 
 and synonyms.
 To build it simply do the following:
 unzip mythes.zip
 cd mythes
 make
 To run the example program:
 ./example th_en_US_new.idx th_en_US_new.dat checkme.lst
 Please play around with it and let me know
 what you think.
 Thanks,
 Kevin Hendricks
 kevin.hendricks@sympatico.ca
--- a/lingucomponent/source/thesaurus/mythes/checkme.lst
+++ b/lingucomponent/source/thesaurus/mythes/checkme.lst
@ -1,4 +0,0 @@
 simple
 complex
 junk
 jhjhjh
--- a/lingucomponent/source/thesaurus/mythes/data_layout.txt
+++ b/lingucomponent/source/thesaurus/mythes/data_layout.txt
@ -1,131 +0,0 @@
 Description of the Structure of the Data needed by MyThes
 --------------------------------------------------------
 MyThes is very simple.  Almost all of the "smarts" are really
 in the thesaurus data file itself.
 The format for this file is at follows:
 - no binary data 
 - line ending is a newline '\n' and not carriage return/linefeeds
 - Line 1 is a character string that describes the encoding
 used for the file.  It is up to the calling program to convert
 to and from this encoding if necessary.
     ISO8859-1 is used by the th_en_US_new.dat file.
     Strings currently recognized by OpenOffice.org are:
     UTF-8
     ISO8859-1
     ISO8859-2
     ISO8859-3
     ISO8859-4
     ISO8859-5
     ISO8859-6
     ISO8859-7
     ISO8859-8
     ISO8859-9
     ISO8859-10
     KOI8-R
     CP-1251
     ISO8859-14
     ISCII-DEVANAGARI
 - All of the remaning lines of the file follow this structure
 entry|num_mean
 pos|syn1_mean|syn2|...
 .
 .
 .
 pos|mean_syn1|syn2|...
 where:
   entry      - all lowercase version of the word or phrase being described
   num_mean   - number of meanings for this entry
   There is one meaning per line and each meaning is comprised of
   pos        -  part of speech or other meaning specific description
   syn1_mean  -  synonym 1 also used to describe the meaning itself 
   syn2       - synonym 2 for that meaning etc.
 To make this even more clearer, here is actual data for the
 entry "simple".
 simple|9
 (adj)|simple |elemental|ultimate|oversimplified|simplistic|simplex|simplified|unanalyzable|
 undecomposable|uncomplicated|unsophisticated|easy|plain|unsubdivided
 (adj)|elementary|uncomplicated|unproblematic|easy
 (adj)|bare|mere|plain
 (adj)|childlike|wide-eyed|dewy-eyed|naive |naif
 (adj)|dim-witted|half-witted|simple-minded|retarded
 (adj)|simple |unsubdivided|unlobed|smooth
 (adj)|plain
 (noun)|herb|herbaceous plant
 (noun)|simpleton|person|individual|someone|somebody|mortal|human|soul
 It says that "simple" has 9 different meanings and each 
 meaning will have its part of speech and at least 1 synonym 
 with other if presetn following on the same line.
 Once you ahve created your own structured text file you can use
 the perl program "th_gen_idx.pl" which can be found in this
 directory to create an index file that is used to seek into
 your data file by the MyThes code.
 The correct way to run the perl program is as follows:
 cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx
 Then if you head the resulting index file you should see the 
 following:
 ISO8859-1
 142689
 'hood|10
 's gravenhage|88
 'tween|173
 'tween decks|196
 .22|231
 .22 caliber|319
 .22 calibre|365
 .38 caliber|411
 .38 calibre|457
 .45 caliber|503
 .45 calibre|549
 0|595
 1|666
 1 chronicles|6283
 1 esdras|6336
 Line 1 is the same encoding string taken from the 
 structured thesaurus data file.
 Line 2 is a count of the total number of entries
 in your thesaurus.
 All of the remaining lines are of the form
 entry|byte_offset_into_data_file_where_entry_is_found
 That's all there is too it.
 Kevin
 kevin.hendricks@sympatico.ca
--- a/lingucomponent/source/thesaurus/mythes/example.cxx
+++ b/lingucomponent/source/thesaurus/mythes/example.cxx
@ -1,128 +0,0 @@
 /*************************************************************************
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * Copyright 2000, 2010 Oracle and/or its affiliates.
 *
 * OpenOffice.org - a multi-platform office productivity suite
 *
 * This file is part of OpenOffice.org.
 *
 * OpenOffice.org is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License version 3
 * only, as published by the Free Software Foundation.
 *
 * OpenOffice.org is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License version 3 for more details
 * (a copy is included in the LICENSE file that accompanied this code).
 *
 * You should have received a copy of the GNU Lesser General Public License
 * version 3 along with OpenOffice.org.  If not, see
 * <http://www.openoffice.org/license.html>
 * for a copy of the LGPLv3 License.
 *
 ************************************************************************/
 // MARKER(update_precomp.py): autogen include statement, do not remove
 #include "precompiled_lingucomponent.hxx"
 #include <cstring>
 #include <cstdlib>
 #include <cstdio>
 #include "mythes.hxx"
 extern char * mystrdup(const char * s);
 using namespace std;
 int
 main(int argc, char** argv)
 {
    char * af;
    char * df;
    char * wtc;
    FILE* wtclst;
  /* first parse the command line options */
  /* arg1 - index file, arg2 thesaurus data file, arg3 - file of words to check */
  if (argv[1]) {
       af = mystrdup(argv[1]);
  } else {
    fprintf(stderr,"correct syntax is:\n");
    fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check\n");
    exit(1);
  }
  if (argv[2]) {
       df = mystrdup(argv[2]);
  } else {
    fprintf(stderr,"correct syntax is:\n");
    fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check\n");
    exit(1);
  }
  if (argv[3]) {
       wtc = mystrdup(argv[3]);
  } else {
    fprintf(stderr,"correct syntax is:\n");
    fprintf(stderr,"example index_file thesaurus_file file_of_words_to_check\n");
    exit(1);
  }
  /* open the words to check list */
  wtclst = fopen(wtc,"r");
  if (!wtclst) {
    fprintf(stderr,"Error - could not open file of words to check\n");
    exit(1);
  }
    // open a new thesaurus object
    MyThes * pMT= new MyThes(af,df);
    // get the encoding used for the thesaurus data
    char * encoding = pMT->get_th_encoding();
    fprintf(stdout,"Thesaurus uses encoding %s\n\n",encoding);
    int k;
    char buf[101];
    mentry * pmean;
    while(fgets(buf,100,wtclst)) {
      k = strlen(buf);
      *(buf + k - 1) = '\0';
      int len = strlen(buf);
      int count = pMT->Lookup(buf,len,&pmean);
      // don't change value of pmean
      // or count since needed for CleanUpAfterLookup routine
      mentry* pm = pmean;
      if (count) {
        fprintf(stdout,"%s has %d meanings\n",buf,count);
    for (int  i=0; i < count; i++) {
          fprintf(stdout,"   meaning %d: %s\n",i,pm->defn);
          for (int j=0; j < pm->count; j++) {
        fprintf(stdout,"       %s\n",pm->psyns[j]);
          }
          fprintf(stdout,"\n");
          pm++;
    }
        fprintf(stdout,"\n\n");
        // now clean up all allocated memory
        pMT->CleanUpAfterLookup(&pmean,count);
      } else {
        fprintf(stdout,"\"%s\" is not in thesaurus!\n",buf);
      }
    }
    delete pMT;
    fclose(wtclst);
    free(wtc);
    free(df);
    free(af);
    return 0;
 }
--- a/lingucomponent/source/thesaurus/mythes/license.readme
+++ b/lingucomponent/source/thesaurus/mythes/license.readme
@ -1,34 +0,0 @@
 /*
 * Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
 * And Contributors.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * 3. All modifications to the source code must be clearly marked as
 *    such.  Binary redistributions based on modified source code
 *    must be clearly marked as modified versions in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */
--- a/lingucomponent/source/thesaurus/mythes/makefile.mk
+++ b/lingucomponent/source/thesaurus/mythes/makefile.mk
@ -1,59 +0,0 @@
 #*************************************************************************
 #
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 # 
 # Copyright 2000, 2010 Oracle and/or its affiliates.
 #
 # OpenOffice.org - a multi-platform office productivity suite
 #
 # This file is part of OpenOffice.org.
 #
 # OpenOffice.org is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License version 3
 # only, as published by the Free Software Foundation.
 #
 # OpenOffice.org is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Lesser General Public License version 3 for more details
 # (a copy is included in the LICENSE file that accompanied this code).
 #
 # You should have received a copy of the GNU Lesser General Public License
 # version 3 along with OpenOffice.org.  If not, see
 # <http://www.openoffice.org/license.html>
 # for a copy of the LGPLv3 License.
 #
 #*************************************************************************
 PRJ = ..$/..$/..
 PRJNAME	= lingucomponent
 TARGET	= mythes
 LIBTARGET=NO
 #----- Settings ---------------------------------------------------------
 .INCLUDE : settings.mk
 # --- Files --------------------------------------------------------
 .IF "$(SYSTEM_MYTHES)" == "YES"
@all:
    @echo "Using system mythes..."
 .ENDIF
 all_target: ALLTAR
 SLOFILES=	\
        $(SLO)$/mythes.obj
 LIB1TARGET= $(SLB)$/lib$(TARGET).lib
 LIB1ARCHIV= $(LB)/lib$(TARGET).a
 LIB1OBJFILES= $(SLOFILES)
 # --- Targets ------------------------------------------------------
 .INCLUDE : target.mk
--- a/lingucomponent/source/thesaurus/mythes/mythes.cxx
+++ b/lingucomponent/source/thesaurus/mythes/mythes.cxx
@ -1,403 +0,0 @@
 /*************************************************************************
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * Copyright 2000, 2010 Oracle and/or its affiliates.
 *
 * OpenOffice.org - a multi-platform office productivity suite
 *
 * This file is part of OpenOffice.org.
 *
 * OpenOffice.org is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License version 3
 * only, as published by the Free Software Foundation.
 *
 * OpenOffice.org is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License version 3 for more details
 * (a copy is included in the LICENSE file that accompanied this code).
 *
 * You should have received a copy of the GNU Lesser General Public License
 * version 3 along with OpenOffice.org.  If not, see
 * <http://www.openoffice.org/license.html>
 * for a copy of the LGPLv3 License.
 *
 ************************************************************************/
 // MARKER(update_precomp.py): autogen include statement, do not remove
 #include "precompiled_lingucomponent.hxx"
 #include "license.readme"
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <errno.h>
 #include "mythes.hxx"
 MyThes::MyThes(const char* idxpath, const char * datpath)
 {
    nw = 0;
    encoding = NULL;
    list = NULL;
    offst = NULL;
    if (thInitialize(idxpath, datpath) != 1) {
        fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
        fflush(stderr);
        thCleanup();
        // did not initialize properly - throw exception?
    }
 }
 MyThes::~MyThes()
 {
    thCleanup();
 }
 int MyThes::thInitialize(const char* idxpath, const char* datpath)
 {
    // open the index file
    FILE * pifile = fopen(idxpath,"r");
    if (!pifile) {
        return 0;
    }
    // parse in encoding and index size */
    char * wrd;
    wrd = (char *)calloc(1, MAX_WD_LEN);
    if (!wrd) {
       fprintf(stderr,"Error - bad memory allocation\n");
       fflush(stderr);
       fclose(pifile);
       return 0;
    }
    int len = readLine(pifile,wrd,MAX_WD_LEN);
    encoding = mystrdup(wrd);
    len = readLine(pifile,wrd,MAX_WD_LEN);
    int idxsz = atoi(wrd);
    // now allocate list, offst for the given size
    list = (char**)   calloc(idxsz,sizeof(char*));
    offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
    if ( (!(list)) || (!(offst)) ) {
       fprintf(stderr,"Error - bad memory allocation\n");
       fflush(stderr);
       fclose(pifile);
       return 0;
    }
    // now parse the remaining lines of the index
    len = readLine(pifile,wrd,MAX_WD_LEN);
    while (len > 0)
    {
        int np = mystr_indexOfChar(wrd,'|');
        if (nw < idxsz) {
            if (np >= 0) {
                *(wrd+np) = '\0';
                list[nw] = (char *)calloc(1,(np+1));
                if (!list[nw]) {
                    fprintf(stderr,"Error - bad memory allocation\n");
                    fflush(stderr);
                    fclose(pifile);
                    return 0;
                }
                memcpy((list[nw]),wrd,np);
                offst[nw] = atoi(wrd+np+1);
                nw++;
            }
        }
        len = readLine(pifile,wrd,MAX_WD_LEN);
    }
    free((void *)wrd);
    fclose(pifile);
    /* next open the data file */
    pdfile = fopen(datpath,"r");
    if (!pdfile) {
        return 0;
    }
    return 1;
 }
 void MyThes::thCleanup()
 {
    /* first close the data file */
    if (pdfile) {
        fclose(pdfile);
        pdfile=NULL;
    }
    if (list)
    {
        /* now free up all the allocated strings on the list */
        for (int i=0; i < nw; i++)
        {
            if (list[i]) {
                free(list[i]);
                list[i] = 0;
            }
        }
        free((void*)list);
    }
    if (encoding) free((void*)encoding);
    if (offst) free((void*)offst);
    encoding = NULL;
    list = NULL;
    offst = NULL;
    nw = 0;
 }
 // lookup text in index and count of meanings and a list of meaning entries
 // with each entry having a synonym count and pointer to an
 // array of char * (i.e the synonyms)
 //
 // note: calling routine should call CleanUpAfterLookup with the original
 // meaning point and count to properly deallocate memory
 int MyThes::Lookup(const char * pText, int len, mentry** pme)
 {
    *pme = NULL;
    // handle the case of missing file or file related errors
    if (! pdfile) return 0;
    long offset = 0;
    /* copy search word and make sure null terminated */
    char * wrd = (char *) calloc(1,(len+1));
    memcpy(wrd,pText,len);
    /* find it in the list */
    int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
    free(wrd);
    if (idx < 0) return 0;
    // now seek to the offset
    offset = (long) offst[idx];
    int rc = fseek(pdfile,offset,SEEK_SET);
    if (rc) {
       return 0;
    }
    // grab the count of the number of meanings
    // and allocate a list of meaning entries
    char * buf = NULL;
    buf  = (char *) malloc( MAX_LN_LEN );
    if (!buf) return 0;
    readLine(pdfile, buf, (MAX_LN_LEN-1));
    int np = mystr_indexOfChar(buf,'|');
    if (np < 0) {
         free(buf);
         return 0;
    }
    int nmeanings = atoi(buf+np+1);
    *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
    if (!(*pme)) {
        free(buf);
        return 0;
    }
    // now read in each meaning and parse it to get defn, count and synonym lists
    mentry* pm = *(pme);
    char dfn[MAX_WD_LEN];
    for (int j = 0; j < nmeanings; j++) {
        readLine(pdfile, buf, (MAX_LN_LEN-1));
        pm->count = 0;
        pm->psyns = NULL;
        pm->defn = NULL;
        // store away the part of speech for later use
        char * p = buf;
        char * pos = NULL;
        np = mystr_indexOfChar(p,'|');
        if (np >= 0) {
           *(buf+np) = '\0';
       pos = mystrdup(p);
           p = p + np + 1;
    } else {
          pos = mystrdup("");
        }
        // count the number of fields in the remaining line
        int nf = 1;
        char * d = p;
        np = mystr_indexOfChar(d,'|');
        while ( np >= 0 ) {
      nf++;
          d = d + np + 1;
          np = mystr_indexOfChar(d,'|');
    }
    pm->count = nf;
        pm->psyns = (char **) malloc(nf*sizeof(char*));
        // fill in the synonym list
        d = p;
        for (int jj = 0; jj < nf; jj++)
        {
            np = mystr_indexOfChar(d,'|');
            if (np > 0)
            {
                *(d+np) = '\0';
                pm->psyns[jj] = mystrdup(d);
                d = d + np + 1;
            }
            else
            {
              pm->psyns[jj] = mystrdup(d);
            }
        }
        // add pos to first synonym to create the definition
        int k = strlen(pos);
        int m = strlen(pm->psyns[0]);
        if ((k+m) < (MAX_WD_LEN - 1)) {
             strncpy(dfn,pos,k);
             *(dfn+k) = ' ';
             strncpy((dfn+k+1),(pm->psyns[0]),m+1);
             pm->defn = mystrdup(dfn);
    } else {
         pm->defn = mystrdup(pm->psyns[0]);
    }
        free(pos);
        pm++;
    }
    free(buf);
    return nmeanings;
 }
 void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
 {
    if (nmeanings == 0) return;
    if ((*pme) == NULL) return;
    mentry * pm = *pme;
    for (int i = 0; i < nmeanings; i++) {
       int count = pm->count;
       for (int j = 0; j < count; j++) {
      if (pm->psyns[j]) free(pm->psyns[j]);
          pm->psyns[j] = NULL;
       }
       if (pm->psyns) free(pm->psyns);
       pm->psyns = NULL;
       if (pm->defn) free(pm->defn);
       pm->defn = NULL;
       pm->count = 0;
       pm++;
    }
    pm = *pme;
    free(pm);
    *pme = NULL;
    return;
 }
 // read a line of text from a text file stripping
 // off the line terminator and replacing it with
 // a null string terminator.
 // returns:  -1 on error or the number of characters in
 //             in the returning string
 // A maximum of nc characters will be returned
 int MyThes::readLine(FILE * pf, char * buf, int nc)
 {
  if (fgets(buf,nc,pf)) {
    mychomp(buf);
    return strlen(buf);
  }
  return -1;
 }
 //  performs a binary search on null terminated character
 //  strings
 //
 //  returns: -1 on not found
 //           index of wrd in the list[]
 int MyThes::binsearch(char * sw, char* _list[], int nlst)
 {
    int lp, up, mp, j, indx;
    lp = 0;
    up = nlst-1;
    indx = -1;
    if (strcmp(sw,_list[lp]) < 0) return -1;
    if (strcmp(sw,_list[up]) > 0) return -1;
    while (indx < 0 ) {
        mp = (int)((lp+up) >> 1);
        j = strcmp(sw,_list[mp]);
        if ( j > 0) {
            lp = mp + 1;
        } else if (j < 0 ) {
            up = mp - 1;
        } else {
            indx = mp;
        }
        if (lp > up) return -1;
    }
    return indx;
 }
 char * MyThes::get_th_encoding()
 {
  if (encoding) return encoding;
  return NULL;
 }
 // string duplication routine
 char * MyThes::mystrdup(const char * p)
 {
  int sl = strlen(p) + 1;
  char * d = (char *)malloc(sl);
  if (d) {
    memcpy(d,p,sl);
    return d;
  }
  return NULL;
 }
 // remove cross-platform text line end characters
 void MyThes::mychomp(char * s)
 {
  int k = strlen(s);
  if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
  if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
 }
 // return index of char in string
 int MyThes::mystr_indexOfChar(const char * d, int c)
 {
  char * p = strchr((char *)d,c);
  if (p) return (int)(p-d);
  return -1;
 }
--- a/lingucomponent/source/thesaurus/mythes/mythes.hxx
+++ b/lingucomponent/source/thesaurus/mythes/mythes.hxx
@ -1,76 +0,0 @@
 #ifndef _MYTHES_HXX_
 #define _MYTHES_HXX_
 // some maximum sizes for buffers
 #define MAX_WD_LEN 200
 #define MAX_LN_LEN 16384
 // a meaning with definition, count of synonyms and synonym list
 struct mentry {
  char*  defn;
  int  count;
  char** psyns;
 };
 class MyThes
 {
       int  nw;                  /* number of entries in thesaurus */
       char**  list;               /* stores word list */
       unsigned int* offst;              /* stores offset list */
       char *  encoding;           /* stores text encoding; */
        FILE  *pdfile;
    // disallow copy-constructor and assignment-operator for now
    MyThes();
    MyThes(const MyThes &);
    MyThes & operator = (const MyThes &);
 public:
    MyThes(const char* idxpath, const char* datpath);
    ~MyThes();
        // lookup text in index and return number of meanings
    // each meaning entry has a defintion, synonym count and pointer
        // when complete return the *original* meaning entry and count via
        // CleanUpAfterLookup to properly handle memory deallocation
        int Lookup(const char * pText, int len, mentry** pme);
        void CleanUpAfterLookup(mentry** pme, int nmean);
        char* get_th_encoding();
 private:
        // Open index and dat files and load list array
        int thInitialize (const char* indxpath, const char* datpath);
        // internal close and cleanup dat and idx files
        void thCleanup ();
        // read a text line (\n terminated) stripping off line terminator
        int readLine(FILE * pf, char * buf, int nc);
        // binary search on null terminated character strings
        int binsearch(char * wrd, char* list[], int nlst);
        // string duplication routine
        char * mystrdup(const char * p);
        // remove cross-platform text line end characters
        void mychomp(char * s);
        // return index of char in string
        int mystr_indexOfChar(const char * d, int c);
 };
 #endif