2007-01-12 05:40:54 -06:00
|
|
|
#
|
|
|
|
# A sample config file for the language models
|
|
|
|
# provided with Gertjan van Noords language guesser
|
|
|
|
# (http://odur.let.rug.nl/~vannoord/TextCat/)
|
|
|
|
#
|
|
|
|
# Notes:
|
|
|
|
# - You may consider eliminating a couple of small languages from this
|
|
|
|
# list because they cause false positives with big languages and are
|
|
|
|
# bad for performance. (Do you really want to recognize Drents?)
|
|
|
|
# - Putting the most probable languages at the top of the list
|
|
|
|
# improves performance, because this will raise the threshold for
|
|
|
|
# likely candidates more quickly.
|
|
|
|
#
|
|
|
|
|
|
|
|
# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding
|
|
|
|
# guess strings are made as following : language-country-encoding
|
|
|
|
|
2007-01-18 07:07:31 -06:00
|
|
|
afrikaans.lm af--utf8
|
|
|
|
albanian.lm sq--utf8
|
|
|
|
amharic_utf.lm am--utf8
|
|
|
|
arabic.lm ar--utf8
|
|
|
|
basque.lm eu--utf8
|
|
|
|
belarus.lm be--utf8
|
|
|
|
bosnian.lm bs--utf8
|
|
|
|
breton.lm br--utf8
|
|
|
|
catalan.lm ca--utf8
|
|
|
|
chinese_simplified.lm zh-CN-utf8
|
|
|
|
chinese_traditional.lm zh-TW-utf8
|
|
|
|
croatian.lm hr--utf8
|
|
|
|
czech.lm cs--utf8
|
|
|
|
danish.lm da--utf8
|
|
|
|
dutch.lm nl--utf8
|
|
|
|
english.lm en--utf8
|
|
|
|
esperanto.lm eo--utf8
|
|
|
|
estonian.lm et--utf8
|
|
|
|
finnish.lm fi--utf8
|
|
|
|
french.lm fr--utf8
|
|
|
|
frisian.lm fy--utf8
|
|
|
|
georgian.lm ka--utf8
|
|
|
|
german.lm de--utf8
|
|
|
|
greek.lm el--utf8
|
|
|
|
hebrew.lm he--utf8
|
|
|
|
hindi.lm hi--utf8
|
|
|
|
hungarian.lm hu--utf8
|
|
|
|
icelandic.lm is--utf8
|
|
|
|
indonesian.lm id--utf8
|
|
|
|
irish_gaelic.lm ga--utf8
|
|
|
|
italian.lm it--utf8
|
|
|
|
japanese.lm ja--utf8
|
|
|
|
korean.lm ko--utf8
|
|
|
|
latin.lm la--utf8
|
|
|
|
latvian.lm lv--utf8
|
|
|
|
lithuanian.lm lt--utf8
|
|
|
|
malay.lm ms--utf8
|
|
|
|
manx_gaelic.lm gv--utf8
|
|
|
|
marathi.lm mr--utf8
|
|
|
|
nepali.lm ne--utf8
|
|
|
|
norwegian.lm nb--utf8 # Norwegian (Bokmal)
|
|
|
|
persian.lm fa--utf8 # Farsi
|
|
|
|
polish.lm pl--utf8
|
|
|
|
portuguese.lm pt-PT-utf8
|
|
|
|
quechua.lm qu--utf8
|
|
|
|
romanian.lm ro--utf8
|
|
|
|
romansh.lm rm--utf8
|
|
|
|
russian.lm ru--utf8
|
|
|
|
sanskrit.lm sa--utf8
|
|
|
|
scots.lm sco--utf8
|
|
|
|
scots_gaelic.lm gd--utf8
|
|
|
|
serbian_ascii.lm sh-YU-utf8
|
|
|
|
slovak_ascii.lm sk-SK-utf8
|
|
|
|
slovenian.lm sl--utf8
|
|
|
|
spanish.lm es--utf8
|
|
|
|
swahili.lm sw--utf8
|
|
|
|
swedish.lm sv--utf8
|
|
|
|
tagalog.lm tl--utf8
|
|
|
|
tamil.lm ta--utf8
|
|
|
|
thai.lm th--utf8
|
|
|
|
turkish.lm tr--utf8
|
|
|
|
ukrainian.lm uk--utf8
|
|
|
|
vietnamese.lm vi--utf8
|
|
|
|
welsh.lm cy--utf8
|
|
|
|
yiddish_utf.lm yi--utf8
|