office-gobmx/libtextcat/data/new_fingerprints/fpdb.conf
2007-01-12 11:40:54 +00:00

82 lines
3.7 KiB
Text

#
# A sample config file for the language models
# provided with Gertjan van Noords language guesser
# (http://odur.let.rug.nl/~vannoord/TextCat/)
#
# Notes:
# - You may consider eliminating a couple of small languages from this
# list because they cause false positives with big languages and are
# bad for performance. (Do you really want to recognize Drents?)
# - Putting the most probable languages at the top of the list
# improves performance, because this will raise the threshold for
# likely candidates more quickly.
#
# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding
# guess strings are made as following : language-country-encoding
afrikaans.lm af---utf8
albanian.lm sq---utf8
amharic_utf.lm am---utf8
arabic.lm ar---utf8
basque.lm eu---utf8
belarus.lm be---utf8
bosnian.lm bs---utf8
breton.lm br---utf8
catalan.lm ca---utf8
chinese_simplified.lm zh-CN--utf8
chinese_traditional.lm zh-TW--utf8
croatian.lm hr---utf8
czech.lm cs---utf8
danish.lm da---utf8
dutch.lm nl---utf8
english.lm en---utf8
esperanto.lm eo---utf8
estonian.lm et---utf8
finnish.lm fi---utf8
french.lm fr---utf8
frisian.lm fy---utf8
georgian.lm ka---utf8
german.lm de---utf8
greek.lm el---utf8
hebrew.lm he---utf8
hindi.lm hi---utf8
hungarian.lm hu---utf8
icelandic.lm is---utf8
indonesian.lm id---utf8
irish_gaelic.lm ga---utf8
italian.lm it---utf8
japanese.lm ja---utf8
korean.lm ko---utf8
latin.lm la---utf8
latvian.lm lv---utf8
lithuanian.lm lt---utf8
malay.lm ms---utf8
manx_gaelic.lm gv---utf8
marathi.lm mr---utf8
nepali.lm ne---utf8
norwegian.lm nb---utf8 # Norwegian (Bokmal)
persian.lm fa---utf8 # Farsi
polish.lm pl---utf8
portuguese.lm pt-PT--utf8
quechua.lm qu---utf8
romanian.lm ro---utf8
romansh.lm rm---utf8
russian.lm ru---utf8
sanskrit.lm sa---utf8
scots.lm sco---utf8
scots_gaelic.lm gd---utf8
serbian_ascii.lm sh-YU--utf8
slovak_ascii.lm sk-SK--utf8
slovenian.lm sl---utf8
spanish.lm es---utf8
swahili.lm sw---utf8
swedish.lm sv---utf8
tagalog.lm tl---utf8
tamil.lm ta---utf8
thai.lm th---utf8
turkish.lm tr---utf8
ukrainian.lm uk---utf8
vietnamese.lm vi---utf8
welsh.lm cy---utf8
yiddish_utf.lm yi---utf8