summaryrefslogtreecommitdiff
path: root/libtextcat/data/new_fingerprints/fpdb.conf
diff options
context:
space:
mode:
Diffstat (limited to 'libtextcat/data/new_fingerprints/fpdb.conf')
-rw-r--r--libtextcat/data/new_fingerprints/fpdb.conf84
1 files changed, 84 insertions, 0 deletions
diff --git a/libtextcat/data/new_fingerprints/fpdb.conf b/libtextcat/data/new_fingerprints/fpdb.conf
new file mode 100644
index 000000000000..5b54fef1d7ad
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/fpdb.conf
@@ -0,0 +1,84 @@
+#
+# A sample config file for the language models
+# provided with Gertjan van Noords language guesser
+# (http://odur.let.rug.nl/~vannoord/TextCat/)
+#
+# Notes:
+# - You may consider eliminating a couple of small languages from this
+# list because they cause false positives with big languages and are
+# bad for performance. (Do you really want to recognize Drents?)
+# - Putting the most probable languages at the top of the list
+# improves performance, because this will raise the threshold for
+# likely candidates more quickly.
+#
+
+# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding
+# guess strings are made as following : language-country-encoding
+
+afrikaans.lm af--utf8
+albanian.lm sq--utf8
+amharic_utf.lm am--utf8
+arabic.lm ar--utf8
+basque.lm eu--utf8
+belarus.lm be--utf8
+bosnian.lm bs--utf8
+breton.lm br--utf8
+catalan.lm ca--utf8
+chinese_simplified.lm zh-CN-utf8
+chinese_traditional.lm zh-TW-utf8
+croatian.lm hr--utf8
+czech.lm cs--utf8
+danish.lm da--utf8
+dutch.lm nl--utf8
+english.lm en--utf8
+esperanto.lm eo--utf8
+estonian.lm et--utf8
+finnish.lm fi--utf8
+french.lm fr--utf8
+frisian.lm fy--utf8
+georgian.lm ka--utf8
+german.lm de--utf8
+greek.lm el--utf8
+hebrew.lm he--utf8
+hindi.lm hi--utf8
+hungarian.lm hu--utf8
+icelandic.lm is--utf8
+indonesian.lm id--utf8
+irish_gaelic.lm ga--utf8
+italian.lm it--utf8
+japanese.lm ja--utf8
+korean.lm ko--utf8
+latin.lm la--utf8
+latvian.lm lv--utf8
+lithuanian.lm lt--utf8
+luxembourgish.lm lb--utf8
+malay.lm ms--utf8
+manx_gaelic.lm gv--utf8
+marathi.lm mr--utf8
+mongolian_cyrillic.lm mn--utf8
+nepali.lm ne--utf8
+norwegian.lm nb--utf8 # Norwegian (Bokmal)
+persian.lm fa--utf8 # Farsi
+polish.lm pl--utf8
+portuguese.lm pt-PT-utf8
+quechua.lm qu--utf8
+romanian.lm ro--utf8
+romansh.lm rm--utf8
+russian.lm ru--utf8
+sanskrit.lm sa--utf8
+scots.lm sco--utf8
+scots_gaelic.lm gd--utf8
+serbian_ascii.lm sh-YU-utf8
+slovak_ascii.lm sk-SK-utf8
+slovenian.lm sl--utf8
+spanish.lm es--utf8
+swahili.lm sw--utf8
+swedish.lm sv--utf8
+tagalog.lm tl--utf8
+tamil.lm ta--utf8
+thai.lm th--utf8
+turkish.lm tr--utf8
+ukrainian.lm uk--utf8
+vietnamese.lm vi--utf8
+welsh.lm cy--utf8
+yiddish_utf.lm yi--utf8