1 files changed, 84 insertions, 0 deletions
diff --git a/libtextcat/data/new_fingerprints/fpdb.conf b/libtextcat/data/new_fingerprints/fpdb.conf
new file mode 100644
index 000000000000..5b54fef1d7ad
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/fpdb.conf
@@ -0,0 +1,84 @@
+#
+# A sample config file for the language models
+# provided with Gertjan van Noords language guesser
+# (http://odur.let.rug.nl/~vannoord/TextCat/)
+#
+# Notes:
+# - You may consider eliminating a couple of small languages from this
+# list because they cause false positives with big languages and are
+# bad for performance. (Do you really want to recognize Drents?)
+# - Putting the most probable languages at the top of the list
+# improves performance, because this will raise the threshold for
+# likely candidates more quickly.
+#
+
+# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding
+# guess strings are made as following : language-country-encoding
+
+afrikaans.lm                         af--utf8
+albanian.lm                          sq--utf8
+amharic_utf.lm                       am--utf8
+arabic.lm                            ar--utf8
+basque.lm                            eu--utf8
+belarus.lm                           be--utf8
+bosnian.lm                           bs--utf8
+breton.lm                            br--utf8
+catalan.lm                           ca--utf8
+chinese_simplified.lm                zh-CN-utf8
+chinese_traditional.lm               zh-TW-utf8
+croatian.lm                          hr--utf8
+czech.lm                             cs--utf8
+danish.lm                            da--utf8
+dutch.lm                             nl--utf8
+english.lm                           en--utf8
+esperanto.lm                         eo--utf8
+estonian.lm                          et--utf8
+finnish.lm                           fi--utf8
+french.lm                            fr--utf8
+frisian.lm                           fy--utf8
+georgian.lm                          ka--utf8
+german.lm                            de--utf8
+greek.lm                             el--utf8
+hebrew.lm                            he--utf8
+hindi.lm                             hi--utf8
+hungarian.lm                         hu--utf8
+icelandic.lm                         is--utf8
+indonesian.lm                        id--utf8
+irish_gaelic.lm                      ga--utf8
+italian.lm                           it--utf8
+japanese.lm                          ja--utf8
+korean.lm                            ko--utf8
+latin.lm                             la--utf8
+latvian.lm                           lv--utf8
+lithuanian.lm                        lt--utf8
+luxembourgish.lm                     lb--utf8
+malay.lm                             ms--utf8
+manx_gaelic.lm                       gv--utf8
+marathi.lm                           mr--utf8
+mongolian_cyrillic.lm                mn--utf8
+nepali.lm                            ne--utf8
+norwegian.lm                         nb--utf8       # Norwegian (Bokmal)
+persian.lm                           fa--utf8       # Farsi
+polish.lm                            pl--utf8
+portuguese.lm                        pt-PT-utf8
+quechua.lm                           qu--utf8
+romanian.lm                          ro--utf8
+romansh.lm                           rm--utf8
+russian.lm                           ru--utf8
+sanskrit.lm                          sa--utf8
+scots.lm                             sco--utf8
+scots_gaelic.lm                      gd--utf8
+serbian_ascii.lm                     sh-YU-utf8
+slovak_ascii.lm                      sk-SK-utf8
+slovenian.lm                         sl--utf8
+spanish.lm                           es--utf8
+swahili.lm                           sw--utf8
+swedish.lm                           sv--utf8
+tagalog.lm                           tl--utf8
+tamil.lm                             ta--utf8
+thai.lm                              th--utf8
+turkish.lm                           tr--utf8
+ukrainian.lm                         uk--utf8
+vietnamese.lm                        vi--utf8
+welsh.lm                             cy--utf8
+yiddish_utf.lm                       yi--utf8