summaryrefslogtreecommitdiff
path: root/libtextcat
diff options
context:
space:
mode:
Diffstat (limited to 'libtextcat')
-rw-r--r--libtextcat/data/new_fingerprints/LICENSE30
-rw-r--r--libtextcat/data/new_fingerprints/fpdb.conf85
-rw-r--r--libtextcat/data/new_fingerprints/lm/afrikaans.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/albanian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/amharic_utf.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/arabic.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/armenian.lm0
-rw-r--r--libtextcat/data/new_fingerprints/lm/basque.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/belarus.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/bosnian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/breton.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/catalan.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/chinese_simplified.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/chinese_traditional.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/croatian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/czech.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/danish.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/drents.lm0
-rw-r--r--libtextcat/data/new_fingerprints/lm/dutch.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/english.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/esperanto.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/estonian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/finnish.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/french.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/frisian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/georgian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/german.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/greek.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/hebrew.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/hindi.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/hungarian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/icelandic.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/indonesian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/irish_gaelic.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/italian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/japanese.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/korean.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/latin.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/latvian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/lithuanian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/luxembourgish.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/malay.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/manx_gaelic.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/marathi.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/middle_frisian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/mingo.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/mongolian_cyrillic.lm363
-rw-r--r--libtextcat/data/new_fingerprints/lm/nepali.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/norwegian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/persian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/polish.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/portuguese.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/quechua.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/romanian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/romansh.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/russian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/sanskrit.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/scots.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/scots_gaelic.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/serbian_ascii.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/slovak_ascii.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/slovenian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/spanish.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/swahili.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/swedish.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/tagalog.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/tamil.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/thai.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/turkish.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/ukrainian.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/vietnamese.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/welsh.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/yiddish_utf.lm400
-rw-r--r--libtextcat/data/new_fingerprints/lm/zulu.lm400
-rw-r--r--libtextcat/libtextcat-2.2.patch4078
-rw-r--r--libtextcat/makefile.mk85
-rw-r--r--libtextcat/prj/build.lst3
-rw-r--r--libtextcat/prj/d.lst12
78 files changed, 32256 insertions, 0 deletions
diff --git a/libtextcat/data/new_fingerprints/LICENSE b/libtextcat/data/new_fingerprints/LICENSE
new file mode 100644
index 000000000000..6d883704c525
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/LICENSE
@@ -0,0 +1,30 @@
+Copyright (c) 2003, WiseGuys Internet B.V.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+- Neither the name of the WiseGuys Internet B.V. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/libtextcat/data/new_fingerprints/fpdb.conf b/libtextcat/data/new_fingerprints/fpdb.conf
new file mode 100644
index 000000000000..df56f9e270ef
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/fpdb.conf
@@ -0,0 +1,85 @@
+#
+# A sample config file for the language models
+# provided with Gertjan van Noords language guesser
+# (http://odur.let.rug.nl/~vannoord/TextCat/)
+#
+# Notes:
+# - You may consider eliminating a couple of small languages from this
+# list because they cause false positives with big languages and are
+# bad for performance. (Do you really want to recognize Drents?)
+# - Putting the most probable languages at the top of the list
+# improves performance, because this will raise the threshold for
+# likely candidates more quickly.
+#
+
+# this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding
+# guess strings are made as following : language-country-encoding
+
+afrikaans.lm af--utf8
+albanian.lm sq--utf8
+amharic_utf.lm am--utf8
+arabic.lm ar--utf8
+basque.lm eu--utf8
+belarus.lm be--utf8
+bosnian.lm bs--utf8
+breton.lm br--utf8
+catalan.lm ca--utf8
+chinese_simplified.lm zh-CN-utf8
+chinese_traditional.lm zh-TW-utf8
+croatian.lm hr--utf8
+czech.lm cs--utf8
+danish.lm da--utf8
+dutch.lm nl--utf8
+english.lm en--utf8
+esperanto.lm eo--utf8
+estonian.lm et--utf8
+finnish.lm fi--utf8
+french.lm fr--utf8
+frisian.lm fy--utf8
+georgian.lm ka--utf8
+german.lm de--utf8
+greek.lm el--utf8
+hebrew.lm he--utf8
+hindi.lm hi--utf8
+hungarian.lm hu--utf8
+icelandic.lm is--utf8
+indonesian.lm id--utf8
+irish_gaelic.lm ga--utf8
+italian.lm it--utf8
+japanese.lm ja--utf8
+korean.lm ko--utf8
+latin.lm la--utf8
+latvian.lm lv--utf8
+lithuanian.lm lt--utf8
+luxembourgish.lm lb--utf8
+malay.lm ms--utf8
+manx_gaelic.lm gv--utf8
+marathi.lm mr--utf8
+mongolian_cyrillic.lm mn--utf8
+nepali.lm ne--utf8
+norwegian.lm nb--utf8 # Norwegian (Bokmal)
+persian.lm fa--utf8 # Farsi
+polish.lm pl--utf8
+portuguese.lm pt-PT-utf8
+quechua.lm qu--utf8
+romanian.lm ro--utf8
+romansh.lm rm--utf8
+russian.lm ru--utf8
+sanskrit.lm sa--utf8
+scots.lm sco--utf8
+scots_gaelic.lm gd--utf8
+serbian_ascii.lm sh-YU-utf8
+slovak_ascii.lm sk-SK-utf8
+slovenian.lm sl--utf8
+spanish.lm es--utf8
+swahili.lm sw--utf8
+swedish.lm sv--utf8
+tagalog.lm tl--utf8
+tamil.lm ta--utf8
+thai.lm th--utf8
+turkish.lm tr--utf8
+ukrainian.lm uk--utf8
+vietnamese.lm vi--utf8
+welsh.lm cy--utf8
+yiddish_utf.lm yi--utf8
+zulu.lm zu--utf8
diff --git a/libtextcat/data/new_fingerprints/lm/afrikaans.lm b/libtextcat/data/new_fingerprints/lm/afrikaans.lm
new file mode 100644
index 000000000000..c110f154b664
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/afrikaans.lm
@@ -0,0 +1,400 @@
+_ 23602
+e 8036
+a 4087
+n 3782
+i 3726
+o 3314
+r 2951
+s 2885
+t 2749
+d 2479
+e_ 2118
+l 1854
+k 1741
+ie 1670
+g 1601
+n_ 1447
+m 1440
+_d 1219
+t_ 1143
+er 1124
+h 1124
+u 1110
+ie_ 1079
+y 1048
+w 986
+s_ 982
+_s 969
+_h 956
+di 924
+an 922
+r_ 912
+aa 882
+v 876
+en 807
+_di 807
+. 790
+y_ 747
+_v 709
+et 706
+._ 694
+die 691
+die_ 667
+_n 666
+_die 651
+p 639
+_m 634
+_die_ 633
+_w 632
+ee 607
+ge 606
+_o 598
+b 586
+te 568
+, 560
+in 555
+k_ 550
+_e 550
+,_ 548
+oo 516
+et_ 511
+de 509
+el 489
+_g 486
+f 461
+ar 451
+ni 450
+nd 442
+an_ 440
+en_ 437
+_i 426
+he 423
+g_ 418
+_t 412
+oe 410
+at 406
+er_ 400
+om 381
+wa 378
+_a 378
+_b 377
+_k 371
+nie 371
+_he 370
+aar 355
+_ge 351
+es 351
+_ni 348
+da 346
+m_ 342
+ou 338
+it 335
+_nie 335
+d_ 332
+l_ 330
+_wa 329
+or 327
+le 326
+we 326
+ek 324
+het 321
+me 319
+_het 319
+is 318
+j 315
+at_ 311
+on 309
+se 308
+_en 298
+ma 294
+st 291
+as 280
+va 277
+_en_ 270
+re 270
+" 269
+' 265
+het_ 261
+_het_ 260
+om_ 254
+al 252
+ar_ 250
+li 248
+te_ 247
+aar_ 247
+_da 245
+u_ 242
+nde 241
+ou_ 237
+_l 231
+be 229
+_' 226
+rd 224
+_va 224
+ig 223
+ng 222
+ns 221
+ve 220
+it_ 218
+_j 216
+_me 216
+sy 215
+ke 213
+_sy 212
+aan 212
+van 212
+_in 210
+is_ 210
+in_ 208
+sy_ 206
+_sy_ 206
+'n 205
+ro 205
+ko 204
+_'n 203
+ra 203
+'n_ 203
+_'n_ 202
+so 202
+D 202
+ho 201
+rs 200
+eer 200
+ik 199
+la 198
+_te 196
+_van 196
+_ma 195
+as_ 194
+ui 194
+ver 192
+e. 192
+der 191
+to 188
+op 187
+van_ 184
+ag 184
+_ve 182
+and 180
+_van_ 178
+ha 178
+f_ 176
+ka 176
+ne 175
+_is 175
+sk 174
+e._ 174
+oor 174
+_ver 170
+ek_ 170
+_hy 170
+hy 170
+p_ 168
+_be 168
+ri 168
+ur 167
+nie_ 165
+_so 165
+_D 164
+si 164
+ll 164
+no 164
+_in_ 163
+_hy_ 162
+hy_ 162
+ed 161
+ers 160
+_r 156
+ak 156
+_ho 155
+_nie_ 153
+eg 153
+nt 152
+de_ 152
+_p 151
+_we 148
+_is_ 148
+ei 147
+es_ 142
+maa 142
+wee 142
+na 141
+nder 139
+a_ 138
+ing 138
+ew 138
+S 135
+lle 135
+_om 135
+_te_ 134
+eu 134
+ie. 134
+wo 132
+em 132
+wat 131
+_no 130
+_" 130
+vo 130
+E 129
+H 128
+_wat 127
+ti 126
+mo 126
+A 126
+e, 126
+_ha 125
+vi 125
+el_ 125
+ter 125
+e,_ 124
+dat 124
+eer_ 124
+wat_ 124
+le_ 124
+ta 124
+Di 123
+dat_ 123
+_wat_ 122
+ie._ 122
+was 121
+ste 121
+_H 121
+_se 121
+se_ 120
+ul 120
+al_ 120
+_was 120
+_om_ 119
+_st 119
+lik 118
+"_ 118
+_ko 118
+_maa 118
+lo 117
+_to 117
+ns_ 115
+aan_ 115
+nie. 114
+_vi 114
+met 114
+_nie. 111
+nk 110
+_Di 110
+- 110
+_op 109
+_oo 109
+_on 108
+ir 108
+ord 108
+uit 106
+ens 105
+_was_ 105
+was_ 105
+een 105
+_met 105
+os 105
+_S 104
+nie._ 104
+ig_ 103
+_sk 102
+op_ 101
+_ek 101
+_wee 101
+ir_ 101
+met_ 100
+_met_ 100
+rt 100
+ik_ 99
+end 99
+nd_ 99
+gt 99
+ond 98
+ot 98
+_aa 97
+og 97
+vir_ 95
+vir 95
+_ka 94
+hu 94
+_mo 94
+_vir_ 94
+_vir 94
+_dit 93
+kr 93
+am 93
+ol 93
+dit 93
+_ek_ 93
+ki 93
+sa 93
+_aan 92
+man 92
+jy 92
+ng_ 92
+aak 92
+lle_ 91
+_hu 91
+_na 91
+_vo 90
+ewe 90
+of 90
+jy_ 90
+_dit_ 90
+dit_ 90
+_jy 89
+der_ 89
+jo 89
+_f 88
+_u 88
+sie 87
+_dat 87
+_jy_ 87
+daa 87
+do 87
+vr 87
+wi 86
+ry 86
+_dat_ 86
+eur 86
+rs_ 85
+_jo 85
+_wo 84
+_ne 84
+jie 84
+ji 84
+pe 83
+moe 83
+my 82
+ull 82
+Die 81
+maar 81
+_hom 81
+ulle 81
+_maar 81
+hom 81
+_uit 80
+_ui 80
+ges 80
+raa 80
+or_ 80
+ies 80
+jou 79
+_la 79
+maar_ 79
+ulle_ 79
+_daa 79
+Die_ 79
+daar 78
+_daar 78
+ien 78
+_my 78
+_jou 78
+ok 78
+il 78
+lik_ 77
+sta 77
+_Die 77
+ur_ 77
+ga 77
+ag_ 77
+kan 77
diff --git a/libtextcat/data/new_fingerprints/lm/albanian.lm b/libtextcat/data/new_fingerprints/lm/albanian.lm
new file mode 100644
index 000000000000..0665a962d018
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/albanian.lm
@@ -0,0 +1,400 @@
+_ 19480
+ë 4099
+e 4082
+t 3635
+i 3134
+a 2893
+r 2820
+n 2610
+s 2380
+h 2060
+ë_ 2055
+e_ 1825
+j 1677
+u 1489
+d 1381
+o 1370
+m 1318
+k 1264
+të 1091
+p 1072
+_t 1068
+sh 998
+l 936
+_n 876
+a_ 822
+, 816
+,_ 808
+të_ 795
+i_ 770
+_p 739
+_m 702
+_s 700
+te 653
+ër 620
+_d 613
+_e 607
+g 602
+_k 601
+_të 593
+. 575
+_të_ 574
+v 567
+_e_ 554
+r_ 525
+._ 523
+ht 503
+n_ 480
+he 473
+në 462
+sht 461
+te_ 457
+q 454
+nd 436
+ri 432
+is 414
+et 403
+b 402
+je 401
+me 395
+in 391
+it 381
+rë 374
+_a 374
+t_ 359
+ur 353
+_i 346
+ar 342
+ës 339
+er 338
+në_ 338
+ën 338
+dh 337
+en 336
+pë 334
+f 328
+_v 323
+jë 318
+nj 313
+ish 312
+për 294
+y 285
+z 282
+es 281
+at 274
+_me 273
+_q 273
+gj 269
+ra 261
+as 258
+_në 256
+ku 256
+j_ 250
+ta 249
+re 246
+një 245
+o_ 243
+ni 243
+_pë 240
+hte 240
+_nj 239
+on 239
+isht 236
+pa 234
+th 233
+shte 233
+_për 232
+se 228
+_g 223
+ve 221
+in_ 220
+s_ 219
+_në_ 219
+do 218
+hte_ 218
+më 216
+ti 215
+aj 212
+shte_ 212
+ej 212
+u_ 211
+që 211
+_sh 210
+nt 207
+jë_ 206
+_b 205
+_një 203
+di 202
+_pa 201
+_i_ 201
+ll 199
+_f 199
+kë 198
+me_ 197
+dhe 195
+ishte 195
+si 194
+hi 191
+he_ 188
+- 187
+ja 187
+_që 187
+ua 186
+il 184
+_dh 184
+ur_ 183
+ër_ 182
+or 180
+se_ 179
+që_ 178
+S 176
+ç 175
+_h 173
+an 172
+një_ 172
+ng 170
+nte 170
+_që_ 169
+_S 169
+rë_ 166
+dhe_ 165
+_me_ 164
+ka 162
+im 159
+hë 158
+mi 157
+to 156
+tu 156
+ën_ 155
+_një_ 154
+ha 153
+nte_ 150
+tr 148
+sa 148
+ët 148
+_gj 148
+un 147
+rr 147
+ë, 147
+_dhe 147
+ej_ 147
+ki 146
+ë,_ 146
+_ku 145
+_- 144
+_ng 142
+ik 141
+_nd 140
+end 138
+uk 137
+etë 135
+ko 135
+_dhe_ 135
+_ve 132
+va 131
+_l 131
+për_ 131
+shi 131
+erë 129
+ke 127
+kis 127
+së 126
+jo 125
+li 124
+ga 124
+kish 123
+_ki 122
+po 122
+_se 122
+' 121
+du 120
+mb 120
+_më 119
+Si 115
+më_ 115
+esh 115
+_si 114
+qe 114
+lë 114
+_kis 113
+oh 113
+_kish 113
+_Si 113
+pr 112
+_u 112
+uar 111
+de 111
+hu 111
+_th 111
+al 111
+ta_ 109
+ilv 108
+Sil 108
+Silv 108
+lv 108
+k_ 108
+e, 108
+ji 107
+e,_ 106
+_Sil 106
+_Silv 106
+_r 105
+os 104
+_se_ 104
+kisht 102
+_di 102
+st 101
+_për_ 101
+bë 101
+tj 100
+_nga 99
+nga 99
+_du 98
+ra_ 98
+vë 98
+gji 98
+_ish 96
+rt 96
+_is 96
+ro 95
+ir 94
+ga_ 94
+ësh 94
+ont 93
+c 93
+t, 93
+t,_ 93
+hin 92
+a, 92
+_at 92
+und 92
+jt 91
+_mb 91
+a,_ 91
+tje 90
+_nga_ 90
+_do 90
+_pr 90
+rit 90
+men 90
+nga_ 90
+ri_ 89
+N 89
+ma 89
+it_ 88
+_kë 88
+-_ 88
+m_ 87
+jo_ 87
+onte 87
+atë 87
+la 87
+ëri 87
+ilva 86
+shin 86
+ë. 86
+Silva 86
+lva 86
+së_ 85
+jer 85
+et_ 85
+_po 85
+ës_ 84
+kur 84
+ru 84
+nin 83
+ot 83
+hin_ 83
+_N 83
+her 83
+htë 82
+ap 82
+shin_ 82
+mo 81
+ash 81
+tha 81
+_ç 81
+ë._ 81
+ëm 81
+jit 80
+_ta 80
+ul 80
+le 80
+ho 80
+_z 79
+dr 78
+jet 78
+nin_ 78
+_më_ 78
+gjit 78
+A 78
+hk 78
+onte_ 78
+oni 77
+lo 77
+ba 77
+herë 77
+ndo 76
+shk 76
+mend 75
+_vë 75
+ha_ 75
+dë 75
+tur 74
+_A 74
+el 74
+bi 74
+_ko 74
+uk_ 73
+erë_ 73
+si_ 73
+_sa 73
+ar_ 72
+P 72
+rs 72
+pas 72
+ith 72
+uar_ 71
+_isht 71
+ai 70
+e. 70
+_vet 70
+vet 70
+_bë 70
+zi 70
+d_ 70
+jith 70
+da 70
+gjith 69
+duk 69
+na 69
+hej 69
+tër 68
+_men 68
+_ka 68
+am 68
+nd_ 68
+_c 67
+_pas 67
+_duk 67
+jes 67
+ak 67
+s, 67
+e._ 67
+s,_ 67
+K 67
+ësht 67
+mu 66
+kur_ 66
+yr 66
+em 65
+_së 65
+tha_ 65
+imi 65
+ie 65
+hej_ 64
+_së_ 64
+_u_ 64
+? 64
+fu 64
+_P 64
diff --git a/libtextcat/data/new_fingerprints/lm/amharic_utf.lm b/libtextcat/data/new_fingerprints/lm/amharic_utf.lm
new file mode 100644
index 000000000000..0c5bc813e663
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/amharic_utf.lm
@@ -0,0 +1,400 @@
+á 21403
+_ 10092
+ˆ 7734
+ሠ6558
+_á 5003
+‹ 4717
+‰ 4401
+በ4274
+á‹ 4176
+Š 4054
+አ3868
+ 2728
+Π1656
+ጠ1591
+µ 1579
+  1425
+ á 1402
+_አ1261
+_á‹ 1231
+¨ 1217
+á 1187
+¨á 1183
+_ሠ1160
+• 1145
+ˆá 1123
+­ 1097
+ን 1043
+Š• 1043
+° 1041
+°á 1004
+_በ991
+á 936
+« 880
+‹ 855
+á‹ 855
+¥ 849
+µá 805
+‰µ 783
+ት 783
+µ_ 763
+«á 709
+¥á 704
+‰  682
+በ 682
+­á 679
+˜ 670
+•á 667
+በá 666
+‰ á 666
+˜á 658
+ 643
+የ 637
+ܬ 637
+‹¨á 627
+የá 627
+ለ 614
+ˆˆ 614
+ˆ­ 611
+ር 611
+_ 588
+‰µ_ 583
+ት_ 583
+_የ 577
+_የá 574
+ለá 573
+ˆˆá 573
+ንá 570
+Š•á 570
+መ 563
+ˆ˜ 563
+ˆ˜á 557
+መá 557
+Š  554
+አ 554
+አá 553
+ተ 553
+Š á 553
+‰° 553
+ተá 547
+‰°á 547
+ሠ534
+ˆ 534
+ áˆ 532
+- 531
+ስ 525
+ˆµ 525
+-- 521
+ሠ515
+ˆ 515
+--- 512
+---- 503
+_በ 499
+----- 494
+_በá 487
+‹á 479
+¨áˆ 477
+•_ 473
+‹á 469
+á‹á 469
+ን_ 468
+Š•_ 468
+¢ 465
+³ 464
+á‹« 457
+‹« 457
+°áˆ 444
+_አ 424
+_አá 424
+ስá 423
+ˆµá 423
+­_ 415
+³á 402
+½ 401
+ 390
+ˆáˆ 389
+› 382
+‹_ 378
+á‹_ 378
+á 365
+‹«á 364
+á‹«á 364
+“ 363
+áˆá 357
+ˆá 357
+£ 356
+¢_ 351
+‰½ 347
+ች 347
+Š 341
+አ341
+á‹­ 337
+¢ 337
+ᢠ337
+‹­ 337
+¢_ 337
+á¢_ 337
+á 336
+… 334
+Š¥ 320
+እ 320
+£á 320
+ሠ320
+Š¥á 318
+እá 318
+ˆ­_ 314
+ር_ 314
+•á‹ 312
+݇ 311
+¨á‰ 301
+ና 300
+ገ 300
+Š“ 300
+Œˆ 300
+ á‰ 299
+˜áˆ 297
+ርá 294
+Π294
+ጠ294
+ˆ­á 294
+š 293
+ˆá 291
+ከ 291
+Œˆá 291
+Šá 291
+Š¨ 291
+áˆá 291
+ገá 291
+áŠá 291
+á‹° 288
+‹° 288
+_እ 285
+_እá 283
+® 279
+Š¨á 279
+ከá 279
+‰ áˆ 279
+በሠ279
+ንዠ276
+šá 276
+Š•á‹ 276
+_ጠ272
+‰¥ 270
+ብ 270
+_ 269
+áŒá 264
+· 264
+ˆ˜áˆ 262
+¥áˆ 262
+መሠ262
+¥áŠ 262
+á‹­á 261
+‹­á 261
+ˆ› 260
+ማ 260
+á‹°á 259
+‹°á 259
+ራ 254
+‰£ 254
+ባ 254
+ˆ« 254
+€ 253
+®á 249
+á 247
+ۇ 245
+µáˆ 244
+ላ 242
+ˆ‹ 242
+የሠ242
+‹¨áˆ 242
+ማá 238
+ˆ›á 238
+ áŠ 237
+ረ 237
+ˆ¨ 237
+‰°áˆ 236
+ተሠ236
+áˆ_ 235
+ˆ_ 235
+ˆá‹ 234
+­áˆ 233
+«áˆ 233
+䣇 230
+ˆš 230
+ባá 230
+ሚ 230
+ድ 228
+‹µ 228
+_መ 227
+በ227
+_መá 226
+ˆ¨á 225
+ረá 225
+² 225
+ᢠ222
+á¢_ 222
+ á‹ 216
+እአ214
+ሚá 214
+Š¥áŠ 214
+ˆšá 214
+…á 213
+²á 212
+«á‹ 210
+ˆ‹á 209
+ላá 209
+© 208
+ˆá‰ 207
+‹áˆ 206
+½_ 206
+䴇 205
+ብá 205
+አ202
+ˆ° 200
+ታ 200
+‰³ 200
+ሰ 200
+ˆ«á 199
+ራá 199
+ሰá 198
+ˆ°á 198
+ትá 195
+ወ 195
+䵇 195
+‹ˆ 195
+‹ˆá 194
+ወá 194
+½á 191
+Ž 191
+‰½_ 189
+ች_ 189
+“á 188
+¸á 186
+¸ 186
+_ለ 184
+_ለá 183
+ለሠ183
+ˆˆáˆ 183
+¥áŠ• 180
+‹¨á‰ 179
+የበ179
+Žá 178
+± 177
+¥áŠ•á 177
+_ከ 175
+δ 174
+ጥ 174
+Š áˆ 172
+አሠ172
+_ከá 170
+† 170
+«_ 169
+Š¥áŠ• 169
+†á 166
+°á‹ 166
+áˆá 165
+“_ 165
+ˆá 165
+ና_ 163
+Š“_ 163
+‰¸ 160
+ቸ 160
+ቸá 160
+‰¸á 160
+ˆáŠ 160
+¨áŠ 159
+¸á‹ 159
+‰€ 158
+ቀ 158
+­á‰ 158
+£áˆ 156
+ቀá 155
+‰€á 155
+ችá 154
+‰¸á‹ 154
+áˆ_ 154
+‰½á 154
+¸á‹ 154
+ቸዠ154
+ˆ_ 154
+µá‰ 152
+³á‹ 151
+‰¸á‹ 151
+Šá 150
+_አ150
+¶ 150
+_áŠá 150
+ƒ 150
+_á‹­á 150
+_á‹­ 150
+‰³á 149
+ታá 149
+ˆá¢ 148
+‰ á‰ 147
+ደሠ147
+‹°áˆ 147
+በበ147
+ 146
+_ተ 146
+_ተá 146
+ለዠ145
+ˆˆá‹ 145
+ድá 144
+µáŠ 144
+›áˆ 144
+‹µá 144
+ቅ 143
+‰… 143
+¥_ 143
+áŒáˆ 142
+Œáˆ 142
+‹³ 141
+ዳ 141
+Žá‰ 139
+ህ 138
+ˆ… 138
+Š“á 137
+ናá 137
+«á‰ 137
+ን 136
+ጠ136
+አዠ135
+Š á‹ 135
+†áŠ 135
+ስበ134
+ˆµá‰ 134
+ጠ 133
+ዳá 133
+Œ  133
+ሆá 133
+ˆ†á 133
+ሆ 133
+‹³á 133
+ˆ† 133
+ሆአ132
+ˆ†áŠ 132
+¨á‰° 131
+ያሠ131
+¨á‰°á 131
+‹«áˆ 131
+á‹ 129
+µá‹ 128
+Š­ 128
+ክ 128
+á 128
+ 128
+°á‰ 127
+Œ á 127
+ጠá 127
+Š« 126
+ካ 126
+á 124
+በአ123
+°áŒ 123
+á‹«á‹ 123
+‹«á‹ 123
+‰ áŠ 123
+Š¨áˆ 122
+ከሠ122
+Œˆáˆ 121
+ገሠ121
diff --git a/libtextcat/data/new_fingerprints/lm/arabic.lm b/libtextcat/data/new_fingerprints/lm/arabic.lm
new file mode 100644
index 000000000000..85f701965e2e
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/arabic.lm
@@ -0,0 +1,400 @@
+_
+ا
+Ù„
+Ùˆ
+ال
+_ا
+ÙŠ
+Ù†
+Ù…
+_ال
+ر
+ب
+.
+ت
+د
+ع
+Ù‡
+_Ùˆ
+Ù†_
+ا_
+Ùƒ
+ج
+..
+Ø©
+Ø­
+Ø£
+س
+_Ù…
+._
+Ù‚
+Ø©_
+Ù‡_
+لا
+Ù’
+_Ø£
+ان
+_Ù
+_ب
+ÙŽ
+لم
+د_
+ول
+ÙŠ_
+Ù‰
+Ù‰_
+...
+وج
+_Ù„
+_ع
+Ù„_
+وا
+جو
+Ù’.
+ص
+الم
+_الم
+..._
+.._
+Ø«
+ود
+Ø°
+Ø´
+من
+وجو
+ÙŽ_
+ÙÙŠ
+لا_
+جود
+ر_
+لى_
+لى
+ان_
+وجود
+لو
+Ù…_
+_ت
+_من
+Ù’...
+_وا
+لع
+الو
+عل
+Ù’..._
+Ù’..
+ين
+الع
+_ÙÙŠ
+ز
+ات
+_ÙŠ
+_الع
+Ù_
+_Ùƒ
+_الو
+من_
+_ان
+مر
+Ø¡
+ÙÙŠ_
+يا
+ب_
+را
+،_
+Ù_
+،
+ض
+_ÙÙŠ_
+تب
+_من_
+لوج
+كا
+لي
+ت_
+لوجو
+Ù‘
+ون
+الوج
+اء
+جود_
+أح
+_أح
+الوجو
+له
+ود_
+ها
+حا
+ذا
+_ر
+على_
+وجود_
+على
+رب
+لوجود
+عر
+_ان_
+او
+اول
+Ø·
+رت
+لت
+بْ
+أحا
+_الوج
+أحاو
+با
+وال
+_ول
+اد
+_وال
+حاول
+_أحاو
+_أحا
+أحاول
+_،_
+حاو
+_،
+ني
+بي
+_عل
+لن
+ته
+ما
+-_
+-
+مرتب
+نا
+_.
+ها_
+مرت
+_._
+_-
+_-_
+بة
+ول_
+_Ø­
+رتب
+دا
+له_
+Ø¡_
+Ùƒ_
+قي
+تبة
+اول_
+مرتبة
+ية
+بل
+ور
+ده
+الت
+Ø®
+رتبة
+الا
+رتبة_
+ين_
+عرب
+ير
+بة_
+تبة_
+قد
+ربْ
+لعربْ
+لعر
+العر
+أن
+لك
+حد
+ون_
+لعرب
+_على_
+_العر
+تÙ
+عن
+بْ.
+_لا
+حاول_
+ذات
+العرب
+_على
+ية_
+عربْ
+Ø¥
+اب
+ئ
+سا
+نو
+كو
+المر
+لل
+يت
+_Ø´
+لم_
+_المر
+اع
+مو
+لمر
+_الا
+ته_
+اج
+_Ù‚
+س_
+ائ
+جب
+ام
+اجب_
+كون
+واجب_
+Ù„ÙŽ
+_لا_
+اني
+سي
+واج
+سم
+Ù„ÙŽ_
+يس
+ال_
+_ولا
+عي
+وص
+عا
+جب_
+اس
+ير_
+_مر
+واجب
+اجب
+_بل
+الن
+ولا
+_بال
+وأ
+أع
+اك
+وق
+بلاد
+نت
+Ù†Ù
+ضا
+نه
+كون_
+بْ..
+ثل
+كل
+ولا_
+_ذا
+ذاته
+المرت
+دة
+ذاته_
+ور_
+بال
+بْ...
+_ولا_
+_الت
+يه
+_الل
+_س
+اء_
+ات_
+بلا
+_وأ
+_Ø°
+صو
+ربْ.
+_بلاد
+لاد
+_بلا
+غ
+لمرتب
+_Ù‡
+بن
+لمرت
+عربْ.
+_Ù†
+_ذات
+اته_
+لله
+Ù’._
+_با
+اته
+_Ø¥
+وم
+الل
+الوا
+موج
+_الله
+نْ
+Ù„Ù
+اÙ
+_يكو
+لر
+قا
+عين
+ست
+يكون
+موجو
+ليس
+ده_
+Ù„Ù_
+_وج
+_وص
+دي
+حم
+الواج
+بين
+_الر
+_يك
+مس
+Ù…Ù
+لله_
+Ù_
+عد
+يل
+_الن
+عق
+اش
+يكو
+يق
+الر
+تÙ_
+_كا
+شي
+_يكون
+لوا
+ار
+موجود
+يك
+هْ
+_ذاته
+ع_
+جا
+الله
+ÙÙˆ
+وب
+_عي
+رس
+دة_
+لواجب
+يكون_
+لواج
+رك
+Ù_
+كان
+لص
+لش
+لث
+زا
+ياء
+ساء
+لعق
+انت
+علم
+العق
+ما_
+قد_
+Ù„Ù
+الله_
diff --git a/libtextcat/data/new_fingerprints/lm/armenian.lm b/libtextcat/data/new_fingerprints/lm/armenian.lm
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/armenian.lm
diff --git a/libtextcat/data/new_fingerprints/lm/basque.lm b/libtextcat/data/new_fingerprints/lm/basque.lm
new file mode 100644
index 000000000000..32522a7eca20
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/basque.lm
@@ -0,0 +1,400 @@
+_ 12114
+a 5537
+e 4040
+i 3103
+t 2567
+n 2474
+r 2406
+o 1937
+k 1761
+z 1627
+u 1561
+d 1367
+l 1075
+s 1048
+b 995
+n_ 958
+a_ 957
+en 802
+g 772
+_e 736
+er 680
+ra 669
+. 625
+at 610
+_b 594
+ar 589
+an 585
+_d 580
+, 569
+,_ 569
+h 563
+ta 563
+te 555
+._ 520
+ko 513
+m 503
+o_ 486
+al 481
+en_ 471
+tz 458
+re 435
+_a 423
+ik 416
+in 409
+it 392
+za 391
+ak 388
+ba 358
+et 357
+da 352
+di 351
+ka 350
+ze 349
+ai 347
+p 340
+ri 335
+la 329
+an_ 327
+de 322
+ez 318
+na 314
+ti 307
+be 305
+i_ 302
+iz 294
+k_ 293
+tu 289
+eta 287
+( 280
+) 280
+_i 279
+or 276
+_( 270
+ko_ 269
+ek 265
+rr 264
+_h 263
+e_ 260
+es 260
+_ba 258
+nt 258
+ha 255
+_g 247
+era 246
+ia 240
+_be 234
+oa 229
+un 229
+ta_ 218
+ma 212
+on 211
+z_ 209
+du 207
+(_ 205
+_(_ 205
+: 204
+tze 204
+:_ 204
+us 191
+u_ 191
+_da 188
+ren 183
+az 183
+_) 183
+ur 182
+st 182
+ea 181
+_et 181
+eta_ 179
+zi 178
+si 177
+zen 177
+as 176
+_n 175
+go 175
+_eta 172
+il 169
+tik 169
+_z 167
+_eta_ 167
+ne 167
+bi 166
+zk 166
+sk 165
+ag 163
+t_ 162
+le 160
+rt 160
+ke 158
+- 150
+gu 148
+au 148
+tza 147
+_de 147
+ati 146
+ut 146
+A 145
+li 145
+hi 145
+E 144
+bat 144
+_o 143
+a, 142
+a,_ 142
+_m 141
+ntz 141
+ad 140
+ren_ 139
+_er 138
+io 138
+)_ 138
+_ez 138
+ir 137
+eh 137
+ab 135
+ate 135
+ak_ 134
+sa 134
+ra_ 134
+_bat 133
+nd 133
+_l 133
+ki 132
+ere 131
+ika 125
+ku 125
+f 124
+ga 123
+ld 122
+_di 121
+arr 120
+el 120
+eg 119
+uz 119
+are 116
+ng 116
+os 116
+_E 115
+gi 113
+_p 113
+_du 113
+mo 113
+zen_ 112
+ib 111
+_k 110
+ber 109
+ako 108
+_iz 107
+iza 107
+ala 107
+itz 107
+har 106
+eko 106
+adi 105
+l_ 105
+ie 105
+ste 105
+atu 104
+am 104
+ska 104
+ah 104
+_. 104
+r_ 104
+zt 103
+rri 103
+ait 103
+ua 103
+tzen 103
+ald 103
+usk 102
+aren 102
+_._ 102
+ro 102
+id 100
+la_ 99
+_s 98
+ue 98
+tek 97
+uska 97
+atz 96
+aren_ 96
+_ha 95
+rre 95
+" 94
+atik 94
+tzen_ 94
+ara 94
+in_ 93
+ni 93
+j 92
+ge 91
+ez_ 91
+ain 91
+od 91
+no 91
+na_ 91
+ri_ 90
+tan 90
+mat 90
+do 89
+_mo 88
+ho 88
+iko 88
+beh 88
+_ad 88
+al_ 87
+_iza 87
+I 87
+zan 87
+_ze 87
+_adi 87
+_A 86
+uk 86
+eha 85
+dit 85
+ru 85
+pe 85
+eko_ 84
+ela 84
+ed 84
+kar 84
+n,_ 83
+ari 83
+bil 83
+_beh 83
+bai 83
+tu_ 83
+n, 83
+ehar 83
+_izan 82
+itu 82
+_ho 82
+is 82
+ean 82
+ama 82
+izan 82
+_)_ 81
+ik_ 81
+B 80
+koa 80
+ot 80
+_na 79
+zu 79
+beha 78
+behar 78
+_t 78
+H 78
+_ko 78
+dir 77
+mati 76
+_j 75
+at_ 75
+em 74
+tika 74
+K 74
+_B 74
+zte 74
+ten 74
+_beha 74
+ter 74
+matik 73
+egi 73
+_dir 73
+a. 72
+ago 72
+kal 72
+ram 72
+ena 72
+unt 71
+lt 71
+_H 71
+su 71
+mod 71
+bo 71
+G 70
+_ber 70
+lde 70
+c 69
+te_ 69
+ar_ 69
+me 69
+ina 69
+dal 69
+ako_ 68
+L 68
+rik 68
+a._ 68
+ori 68
+ite 68
+raz 67
+alde 67
+)._ 67
+). 67
+zio 66
+_ez_ 66
+tan_ 66
+amat 66
+atika 66
+est 66
+ntza 65
+dut 65
+izk 65
+_ga 65
+ia_ 65
+s_ 65
+ita 65
+tea 64
+ei 64
+hal 64
+_mod 64
+pa 64
+bu 64
+re_ 63
+eu 63
+ert 63
+oa_ 63
+_era 63
+ten_ 63
+_G 63
+_eg 63
+rama 63
+odal 63
+ramat 63
+oda 63
+amati 62
+atze 62
+gr 62
+untz 62
+eza 62
+gra 62
+_ge 61
+gram 61
+kat 61
+abi 61
+_gr 60
+_gram 60
+_gra 60
+int 60
+rd 60
+_in 60
+k, 59
+_hi 59
+zko 59
+k,_ 59
+pr 59
+rab 59
+da_ 59
+ide 58
+_)._ 57
+tat 57
+ing 57
+ira 57
+tak 57
+x 57
+_). 57
+_ed 57
+_es 57
+_bi 57
+rak 56
+_- 56
diff --git a/libtextcat/data/new_fingerprints/lm/belarus.lm b/libtextcat/data/new_fingerprints/lm/belarus.lm
new file mode 100644
index 000000000000..7d58602e51c3
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/belarus.lm
@@ -0,0 +1,400 @@
+_
+а
+н
+Ñ€
+е
+i
+к
+Ñ‹
+л
+у
+Ñ‚
+д
+а_
+о
+м
+в
+ц
+з
+.
+на
+г
+п
+ка
+._
+ра
+ч
+i_
+ал
+у_
+ар
+ан
+Ñ_
+та
+,
+б
+,_
+ва
+Ñ‹_
+ш
+_Ñ
+ла
+ь
+й
+_д
+ле
+га
+_а
+ад
+_п
+ры
+да
+_у
+ен
+Ñ‚Ñ‹
+й_
+е_
+_г
+ны
+_н
+на_
+зе
+_в
+-
+_б
+аÑ
+ам
+_i
+ав
+ро
+аг
+_у_
+_з
+па
+нн
+Ñк
+Ñ…
+ÑŽ
+_м
+не
+дз
+_i_
+_к
+Ñ‹Ñ
+пр
+ÑÑ‚
+ак
+ын
+iк
+ль
+нÑ
+нi
+лi
+за
+аз
+ру
+ет
+ж
+Ñн
+ку
+лÑ
+ай
+_М
+_Ð
+_Ñ
+нÑ
+ага
+ат
+ай_
+М
+але
+га_
+ага_
+ме
+_на
+ала
+"
+_па
+Ñ‚Ñ€
+ер
+кал
+кi
+м_
+кт
+ава
+вi
+дзе
+нÑк
+ана
+Ð’
+го
+ац
+Ñц
+а.
+)
+о_
+iн
+_ад
+цы
+ца
+а._
+тар
+ма
+цi
+ау
+ь_
+Ñка
+ча
+_ка
+Ñа
+Ñ€_
+_._
+Ñк
+_га
+_Ñ€
+_Ñ‚
+_пр
+_за
+ве
+ÑÑ
+ны_
+да_
+др
+мi
+бы
+_.
+Ñ€Ñ
+ара
+он
+оу
+_да
+ек
+и
+лек
+_У
+ым
+ыл
+аÑ
+ло
+нк
+нт
+пра
+)_
+пi
+ары
+_на_
+та_
+рн
+Ñ…_
+У
+Ñi
+шы
+кр
+аÑ_
+ел
+ван
+ец
+Ñта
+чы
+_бы
+ае
+I
+_Ñ
+_П
+iка
+Ñу
+Ñд
+(
+iм
+газе
+_газ
+_газе
+_I
+азе
+_)
+вÑ
+_(
+iч
+тро
+нÑ_
+аш
+ор
+газ
+_)_
+ов
+аван
+П
+ран
+ун
+лÑ_
+ар_
+дак
+вар
+_да_
+Ñ‹Ñн
+Ñв
+черн
+_Я
+ык
+ктрон
+трон
+Ñл
+'
+ект
+ектро
+но
+нага_
+нна
+--
+iл
+ку_
+нÑу
+-_
+Ðл
+Ве
+был
+рыÑ
+онн
+_-
+ронн
+iÑ
+Ñдак
+ктр
+кта
+Ñан
+лi_
+_"
+д_
+Ñлект
+ап
+ктро
+_Ñл
+чер
+Ñлек
+зе_
+лектр
+мп
+ерн
+_Ñле
+_кал
+нÑка
+ктар
+Ñцi
+Вечер
+ечер
+ач
+ечерн
+Я
+ам_
+ектр
+Вече
+кÑ
+_--_
+дзе_
+тронн
+наг
+Ñда
+Веч
+Б
+Ñ‚Ñ‹_
+зет
+ече
+лект
+_Ðл
+еч
+ук
+ада
+рон
+_ва
+ла_
+_Ñлек
+_был
+уÑ
+--_
+нага
+че
+вары
+Ñле
+_--
+рав
+уку
+каг
+_ш
+кай
+ний_
+Ðа
+ий_
+ка.
+_ч
+ннаг
+ыц
+_Р
+руÑ
+_Ð’
+_Ð
+ÑÑ€
+Ñн
+Ñм
+_Б
+кра
+черни
+у,_
+нд
+ни
+дакт
+лару
+нц
+кага_
+_Мин
+ныÑ
+МинÑ
+нь
+акта
+лар
+_длÑ
+..
+_вар
+нiк
+i,
+_пра
+i.
+Ñнт
+Ñна
+о,
+Ñу
+(_
+бо
+ннÑ
+ць
+рни
+цц
+це
+Ñнта_
+дл
+_МинÑ
+ларуÑ
+Ñнта
+_го
+Ñ‚Ñ
+к_
+_Ñ€Ñ
+ерни
+вы
+цца
+ана_
+ронна
+не_
+ндр
+зеты_
+_Ñк
+Мин
+длÑ_
+_длÑ_
+елар
+Бел
+рыÑн
diff --git a/libtextcat/data/new_fingerprints/lm/bosnian.lm b/libtextcat/data/new_fingerprints/lm/bosnian.lm
new file mode 100644
index 000000000000..cf6b8a41ce67
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/bosnian.lm
@@ -0,0 +1,400 @@
+_ 9464
+a 2787
+i 2108
+e 2077
+o 2018
+j 1396
+n 1328
+s 1170
+u 1010
+r 988
+d 957
+a_ 889
+e_ 833
+t 774
+je 771
+k 756
+l 743
+m 702
+v 685
+p 604
+c 538
+i_ 538
+_s 522
+u_ 476
+z 463
+_p 450
+o_ 433
+,_ 381
+, 381
+_i 369
+_n 358
+b 349
+_d 349
+. 346
+na 341
+je_ 337
+._ 321
+_j 307
+g 299
+ra 292
+st 283
+ko 278
+_je 274
+ij 254
+_o 245
+ni 240
+_k 236
+an 228
+oj 227
+da 226
+_u 222
+pr 221
+no 219
+ma 219
+la 211
+ri 206
+_je_ 203
+po 203
+ci 196
+_pr 191
+os 190
+od 187
+ka 186
+im 185
+ti 184
+li 182
+vo 178
+_po 174
+ja 171
+_i_ 169
+ov 169
+al 168
+re 167
+ne 167
+m_ 164
+ta 160
+na_ 158
+ed 157
+_m 157
+_na 157
+se 156
+_u_ 154
+en 153
+ic 151
+sa 151
+_b 149
+ak 141
+va 140
+ad 137
+h 136
+ju 135
+su 134
+dj 133
+ije 133
+ar 127
+ca 127
+_z 126
+nj 125
+ji 124
+da_ 124
+_ko 123
+_da 122
+il 121
+" 120
+av 120
+_t 118
+aj 116
+ob 115
+ro 114
+am 114
+vi 114
+_su 113
+om 112
+dje 112
+za 112
+at 111
+le 111
+di 110
+su_ 107
+iz 107
+ve 107
+lj 106
+_se 105
+ev 105
+is 105
+es 103
+se_ 103
+do 101
+ih 100
+a, 99
+_su_ 99
+a,_ 99
+on 98
+bi 98
+in 97
+voj 97
+az 96
+ac 95
+_da_ 95
+la_ 95
+_r 93
+_g 93
+jev 92
+ma_ 92
+er 92
+or 91
+h_ 90
+_sa 90
+e, 90
+e,_ 90
+ba 90
+ima 89
+a. 88
+ol 88
+_do 87
+dn 87
+it 87
+ko_ 86
+ne_ 86
+ost 85
+ek 85
+to 85
+d_ 84
+as 84
+ju_ 84
+ao 84
+ih_ 84
+a._ 84
+te 83
+evo 83
+koj 83
+pri 82
+jevo 82
+ce 81
+_se_ 81
+og 80
+go 80
+jevoj 79
+de 79
+uc 79
+evoj 79
+_od 78
+_za 78
+tr 78
+S 77
+_koj 76
+ke 75
+_v 75
+ao_ 75
+_dje 74
+_bi 74
+sta 74
+_dj 74
+cij 74
+ik 74
+djev 73
+sl 73
+_djev 72
+ga 72
+djevo 72
+_ka 71
+rij 71
+_iz 71
+P 71
+_pri 70
+_a 69
+us 68
+_S 68
+mo 67
+el 67
+sk 66
+me 66
+zi 66
+ija 65
+n_ 65
+ku 64
+im_ 63
+_st 63
+ica 63
+_na_ 62
+_ne 62
+em 61
+edn 61
+jk 61
+io 61
+li_ 60
+ojk 60
+evojk 60
+_" 60
+zn 60
+vojk 60
+pro 59
+lo 59
+ije_ 59
+jed 58
+ke_ 58
+om_ 58
+jen 58
+sti 57
+_im 57
+le_ 57
+_ra 56
+e. 56
+ze 55
+_pro 55
+nu 55
+nje 55
+ti_ 55
+ec 55
+pre 55
+oc 54
+aci 54
+no_ 54
+et 54
+oji 53
+si 53
+ara 53
+ama 53
+z_ 53
+pos 52
+rad 52
+ran 52
+ima_ 52
+ru 52
+_P 52
+tu 52
+mu 51
+e._ 51
+ja_ 50
+_pre 50
+sa_ 49
+io_ 49
+od_ 48
+ni_ 48
+_nj 48
+j_ 48
+_pos 47
+_c 47
+ila 47
+K 46
+_sa_ 46
+uz 46
+N 46
+_ni 45
+zna 45
+U 45
+za_ 45
+_no 45
+ako 45
+u, 44
+lu 44
+ali 44
+u,_ 44
+sto 44
+ste 44
+ve_ 44
+ani 44
+oli 44
+aka 44
+_jed 43
+i,_ 43
+ji_ 43
+uci 43
+i, 43
+ci_ 43
+osti 43
+_N 42
+dr 42
+so 42
+ust 41
+ila_ 41
+B 41
+- 41
+red 41
+jke 41
+sv 41
+_go 41
+bar 41
+g_ 41
+est 40
+D 40
+iv 40
+vojke 40
+aju 40
+ta_ 40
+A 40
+lje 40
+jedn 40
+bil 40
+ojke 40
+ova 40
+ati 39
+_mu 39
+pa 39
+M 39
+_ba 39
+ca_ 39
+O 39
+ka_ 39
+_a_ 38
+_B 38
+_ima 38
+sn 38
+nu_ 38
+T 38
+to_ 38
+eg 38
+ava 38
+ros 37
+ir 37
+ala 37
+og_ 37
+osl 37
+ovi 37
+koji 37
+_sv 37
+dv 36
+ric 36
+_za_ 36
+br 36
+_on 36
+odi 36
+_koji 36
+_jedn 35
+nik 35
+dno 35
+_D 35
+jo 35
+tra 35
+_M 35
+sp 35
+iz_ 35
+oz 35
+vr 35
+u. 35
+eri 35
+I 35
+eko 35
+ale 35
+_ma 34
+lik 34
+_bil 34
+c_ 34
+ut 34
+je,_ 34
+u._ 34
+str 34
+je, 34
+adi 34
+tit 34
+_iz_ 34
+iti 34
+i. 33
+_rad 33
+ici 33
+rost 33
+aju_ 33
+va_ 33
+_ob 33
+nog 33
diff --git a/libtextcat/data/new_fingerprints/lm/breton.lm b/libtextcat/data/new_fingerprints/lm/breton.lm
new file mode 100644
index 000000000000..6d021d739672
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/breton.lm
@@ -0,0 +1,400 @@
+_ 21447
+e 6375
+a 5414
+n 3228
+r 3039
+o 2968
+t 2392
+i 1812
+h 1751
+u 1650
+l 1630
+d 1506
+a_ 1352
+z 1319
+t_ 1310
+_e 1168
+_a 1168
+e_ 1133
+m 1105
+s 1100
+g 1090
+r_ 998
+k 997
+n_ 958
+et 941
+v 888
+_d 868
+an 859
+. 846
+' 841
+en 836
+b 757
+, 749
+,_ 743
+._ 716
+ar 703
+ou 700
+et_ 689
+c 686
+ez 572
+'h 572
+_g 565
+er 555
+p 553
+_k 535
+c'h 530
+c' 530
+nt 513
+_h 505
+re 505
+ra 478
+ha 466
+ñ 458
+ne 456
+oa 454
+_o 442
+_b 434
+- 432
+zh 422
+ar_ 415
+_m 414
+_e_ 414
+nn 384
+el 376
+_a_ 356
+ur 350
+o_ 346
+h_ 345
+ve 340
+nt_ 340
+w 339
+ke 338
+de 333
+añ 332
+_p 332
+s_ 327
+he 325
+on 318
+le 318
+ga 316
+ma 315
+_ar 312
+eu 312
+_n 310
+an_ 298
+ant 296
+enn 285
+z_ 282
+_ar_ 281
+be 280
+_v 276
+_r 272
+al 270
+en_ 268
+_ke 267
+l_ 264
+em 264
+_c 263
+ñ_ 262
+da 262
+_s 261
+ho 260
+di 259
+_ha 252
+ll 250
+tr 248
+oa_ 247
+me 246
+us 242
+_ga 234
+la 231
+ket 227
+ant_ 219
+_da 219
+_l 216
+ur_ 216
+_oa 215
+in 214
+ket_ 211
+gan 211
+_c' 207
+_u 207
+_c'h 207
+ad 207
+añ_ 207
+ao 204
+_ma 204
+_t 204
+_ket 201
+_an 199
+_di 197
+ezh 196
+ù 196
+où 196
+_de 195
+ev 193
+? 192
+st 192
+ro 192
+P 192
+_ket_ 188
+er_ 188
+f 186
+na 186
+ue 185
+da_ 184
+?_ 184
+_gan 184
+_da_ 184
+_ne 183
+ed 182
+_P 180
+g_ 180
+pe 179
+m_ 178
+A 177
+ri 176
+us_ 175
+ta 174
+ze 174
+gant 174
+ka 174
+i_ 172
+d_ 171
+G 167
+te 167
+ae 166
+zh_ 164
+ha_ 163
+_ha_ 163
+_he 161
+_gant 159
+do 159
+oue 159
+_G 158
+eus 158
+eo 158
+'h_ 157
+_en 157
+go 157
+am 157
+c'h_ 157
+_be 156
+we 156
+iz 154
+_an_ 151
+_A 150
+eus_ 147
+sk 147
+li 146
+as 146
+_pe 146
+j 146
+_oa_ 146
+av 144
+gant_ 143
+ut 142
+no 141
+vez 140
+va 140
+_ra 140
+ge 138
+ez_ 138
+bo 137
+  137
+_ur 136
+lo 134
+he_ 134
+où_ 133
+ù_ 133
+_ur_ 132
+es 130
+'ho 129
+ni 129
+uz 129
+tra 127
+se 126
+it 125
+ra_ 125
+out 125
+is 125
+at 125
+hi 125
+eg 125
+ig 124
+ko 124
+io 123
+k_ 123
+ch 123
+_w 121
+or 121
+Pe 121
+_ma_ 119
+ma_ 119
+gw 118
+_em 118
+_Pe 118
+un 118
+eme 117
+ne_ 117
+nn_ 117
+c'ho 117
+ol 116
+ag 116
+M 115
+'ha 115
+_en_ 115
+iv 115
+vi 113
+_ka 113
+K 113
+ud 112
+_he_ 111
+ont 110
+oc 110
+vo 110
+ec 109
+wa 109
+.. 107
+_M 107
+_z 107
+br 107
+om 106
+to 105
+_f 105
+N 105
+_c'ho 104
+ti 104
+ut_ 104
+D 104
+_o_ 103
+_la 103
+_go 101
+az 101
+out_ 101
+ba 101
+enn_ 101
+c'ha 101
+our 100
+oc'h 100
+ell 100
+oc' 100
+etr 99
+el_ 99
+_K 99
+_D 99
+: 99
+:_ 99
+eve 98
+_d' 97
+all 97
+d' 97
+E 97
+_ne_ 97
+_me 95
+eo_ 95
+ak 95
+bet 95
+_eu 95
+rc 94
+_do 94
+_gw 94
+zi 93
+oz 93
+aou 93
+etra 92
+pa 91
+ab 90
+on_ 90
+ei 90
+tra_ 90
+n, 89
+zo 89
+ag_ 89
+_ev 88
+ul 88
+'e 88
+n' 88
+n,_ 88
+ouz 87
+v_ 86
+_n' 86
+_eus 84
+H 83
+za 83
+S 83
+etra_ 83
+_eo 82
+t,_ 82
+t, 82
+il 81
+ent 81
+fe 81
+rc'h 81
+rc' 81
+_eus_ 80
+ie 80
+_bo 79
+ele 79
+_ve 79
+mp 79
+_bet 78
+B 78
+it_ 77
+_vo 77
+'a 77
+n. 76
+_S 76
+hag 76
+hoa 75
+_hag 75
+len 75
+_N 75
+'hoa 74
+_E 74
+ir 74
+hag_ 74
+_hag_ 74
+mañ 74
+as_ 73
+eze 73
+ont_ 73
+_. 73
+iñ 73
+r, 72
+pr 72
+ed_ 72
+n._ 72
+re_ 72
+in_ 72
+r,_ 72
+_H 71
+'he 70
+t. 70
+gou 70
+em_ 70
+_br 70
+rae 70
+rez 69
+t._ 69
+bet_ 69
+net 69
+dr 68
+_eo_ 68
+ll_ 68
+mo 67
+po 67
+oul 67
+rou 67
+c'hoa 67
+a- 67
+vel 67
+oc'h_ 67
+nna 66
+_B 66
+met 65
+ec' 65
+ec'h 65
+R 64
+den 64
diff --git a/libtextcat/data/new_fingerprints/lm/catalan.lm b/libtextcat/data/new_fingerprints/lm/catalan.lm
new file mode 100644
index 000000000000..086a45b50c50
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/catalan.lm
@@ -0,0 +1,400 @@
+_ 16604
+e 5091
+a 4937
+s 3120
+r 3037
+i 2987
+l 2747
+n 2641
+t 2466
+o 2274
+d 1775
+c 1740
+u 1515
+a_ 1460
+s_ 1416
+_d 1116
+e_ 1067
+p 1014
+m 995
+es 985
+de 972
+_de 842
+en 700
+_l 691
+re 652
+_e 637
+de_ 611
+el 602
+_de_ 601
+_a 600
+, 599
+,_ 599
+er 598
+la 584
+ar 583
+_p 550
+l_ 547
+ci 522
+n_ 518
+es_ 515
+an 515
+ra 499
+nt 495
+ta 495
+b 476
+_c 452
+al 450
+v 440
+g 428
+. 420
+on 420
+or 404
+t_ 402
+at 396
+._ 377
+r_ 372
+i_ 371
+la_ 368
+_i 364
+_la 351
+_la_ 325
+f 325
+le 322
+te 312
+' 311
+_s 308
+st 308
+se 305
+ó 302
+ue 302
+na 301
+os 301
+qu 300
+q 300
+el_ 299
+it 292
+co 290
+ri 277
+ca 277
+ti 273
+ac 272
+in 268
+ll 264
+ic 264
+me 259
+un 257
+que 248
+C 248
+tr 244
+ns 242
+ió 238
+_i_ 236
+ad 233
+ent 232
+_el 227
+ne 226
+_t 223
+_co 221
+_m 221
+_C 217
+en_ 217
+li 217
+ia 212
+à 212
+pe 207
+_a_ 207
+pr 206
+ció 203
+ó_ 203
+pa 203
+ro 202
+o_ 198
+E 198
+di 197
+io 197
+ls 196
+h 192
+_q 192
+_qu 192
+_en 187
+aci 186
+am 186
+ec 183
+to 183
+as 180
+om 180
+ni 180
+da 179
+ió_ 179
+si 178
+ls_ 176
+L 175
+ma 172
+res 172
+ur 171
+_el_ 162
+rt 162
+ue_ 160
+A 160
+os_ 159
+_que 159
+que_ 158
+_r 158
+po 157
+_es 155
+er_ 155
+_que_ 155
+M 155
+_se 153
+va 153
+del 153
+ció_ 151
+_pr 151
+is 150
+_en_ 149
+P 147
+_pe 146
+_del 145
+ts 145
+lo 145
+_M 144
+ct 144
+_u 144
+ol 143
+ve 141
+_L 140
+x 140
+y 140
+a,_ 139
+a, 139
+nc 138
+men 137
+al_ 137
+_f 137
+_re 137
+_P 136
+ació 136
+les 136
+rs 134
+est 133
+tu 131
+_E 130
+et 130
+s,_ 129
+_un 129
+na_ 129
+_v 129
+s, 129
+ion 127
+per 126
+so 125
+em 125
+at_ 124
+no 124
+j 124
+br 123
+nt_ 122
+ar_ 122
+sa 121
+_n 119
+les_ 118
+é 118
+ce 117
+il 117
+ell 116
+_per 114
+í 114
+ob 113
+re_ 113
+ir 113
+_A 112
+ons 112
+do 112
+ua 112
+con 112
+ment 111
+gu 111
+ts_ 110
+ss 110
+ns_ 109
+ant 109
+ra_ 109
+Co 109
+par 108
+l' 107
+d' 107
+_l' 107
+els 107
+tat 107
+sc 106
+_d' 106
+an_ 105
+_Co 105
+vi 104
+els_ 104
+ica 104
+ran 103
+ul 102
+iv 102
+S 102
+_del_ 102
+del_ 102
+mb 101
+mi 101
+ita 101
+nta 100
+_pa 99
+_o 99
+_con 98
+ació_ 97
+rn 96
+_in 96
+ia_ 96
+z 96
+im 95
+rr 95
+art 94
+ta_ 93
+com 93
+tre 92
+_h 92
+s. 91
+mp 90
+ie 90
+J 90
+s._ 89
+cio 89
+_le 89
+bre 88
+_ca 88
+_al 88
+sta 88
+_com 88
+cu 88
+à_ 87
+pre 87
+fe 86
+ba 86
+tra 86
+ge 85
+pro 85
+_les 84
+des 84
+ter 84
+_po 84
+_les_ 84
+T 84
+_J 84
+nd 84
+cion 84
+_S 84
+ura 83
+nci 82
+va_ 81
+ha 81
+ona 81
+ent_ 80
+ues 80
+oc 80
+ea 80
+nte 80
+és 80
+_di 79
+ui 79
+as_ 78
+ut 78
+ici 78
+res_ 78
+us 77
+ot 77
+ara 77
+ip 75
+rm 75
+ab 75
+eg 75
+_per_ 75
+ng 75
+'a 75
+I 75
+per_ 75
+rec 74
+du 74
+_tr 74
+è 73
+cia 73
+_no 73
+b_ 73
+_par 72
+ep 72
+id 72
+lle 71
+rc 71
+_pro 70
+D 69
+G 69
+ga 69
+fo 69
+una 68
+El 68
+lit 68
+un_ 68
+ques 68
+amb 67
+ix 67
+és_ 67
+_G 67
+era 67
+cr 67
+) 66
+da_ 66
+- 66
+sp 66
+y_ 66
+ada 66
+tor 66
+( 66
+_( 66
+_T 65
+ment_ 65
+B 65
+_es_ 65
+Ma 65
+V 65
+uc 65
+ect 65
+ame 64
+iu 64
+_Ma 64
+orn 64
+_B 64
+_D 64
+Ca 64
+sti 64
+_g 63
+esc 63
+rd 63
+una_ 63
+là 62
+" 62
+ed 62
+amen 61
+mo 61
+ions 61
+_El 61
+_Ca 61
+Jo 61
+eu 61
+ari 61
+lt 61
+F 61
+u_ 61
+ament 60
+_V 60
+m_ 60
+fi 60
+au 60
+ev 60
+La 60
+itat 59
+_ha 59
diff --git a/libtextcat/data/new_fingerprints/lm/chinese_simplified.lm b/libtextcat/data/new_fingerprints/lm/chinese_simplified.lm
new file mode 100644
index 000000000000..622b89c3ba80
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/chinese_simplified.lm
@@ -0,0 +1,400 @@
+_
+,_
+_,_
+_,
+,
+çš„
+çš„_
+_çš„
+_çš„_
+_。
+。_
+。
+_。_
+国
+了_
+了
+_了_
+在
+_了
+_ï¿¥_
+_在
+_1_
+_1
+_ï¿¥
+国_
+1
+在_
+ï¿¥_
+ï¿¥
+å¹´
+1_
+_在_
+_ï¼
+_ï¼_
+部
+ï¼
+ï¼_
+5_
+æ°‘
+_5
+_5_
+5
+政
+å·¥
+ã€_
+_å¹´
+_æ—¥
+æ—¥
+_ã€
+å¹´_
+行
+_ã€_
+建
+ã€
+ï¼”
+多_
+多
+ï¼–
+百
+作
+会
+æ—¥_
+_百
+_建
+ç†
+和
+_ï¼”
+军
+_多
+_多_
+_百_
+_部
+_æ—¥_
+å…³
+_ï¼–_
+和_
+æˆ
+百_
+_ï¼–
+_和_
+_和
+ï¼”_
+è¿›
+ï¼–_
+_军
+_ï¼”_
+_万_
+ï¼™
+我
+_万
+展
+å¾—
+地
+们
+产
+举
+过
+主
+上
+ç›®
+行_
+于
+一
+万
+_国
+å…¨
+å‘
+到_
+_è¿›
+_å·¥
+_过
+人
+_ï¼™_
+_我
+到
+个_
+个
+_上
+_ï¼™
+万_
+_举
+ä¸
+部_
+_å…³
+ï¼™_
+们_
+é•¿
+_政
+å¼€
+战
+ç»
+_性
+_上_
+并
+性
+æ²»_
+大
+_工作
+_个
+æ°‘_
+_内_
+ä»–
+è¿™
+_内
+æ²»
+计
+å“
+术
+工作_
+å…±
+县
+内
+―_
+区_
+员_
+_―
+术_
+_工作_
+上_
+部门
+_部门
+作_
+_并
+_è¿™
+一_
+于_
+分
+区
+å“_
+_引
+_å¼€
+ç³»
+员
+县_
+_―_
+_ä¸
+工作
+_一_
+―
+_å‘
+_到_
+_ä»–
+_到
+_æ°‘
+主义
+_å¹´_
+_å…¨
+生
+_å…±
+_个_
+性_
+é—¨
+_性_
+地_
+å¾€
+机
+案
+引
+åˆ
+内_
+_一
+展_
+义
+府
+问题_
+里_
+常
+å–得了
+市
+期
+è¿™_
+æ•°
+é—¨_
+å°±
+è¦_
+ç­‰_
+å°
+ä»–_
+å­—
+家
+社会
+技
+å¾—_
+å°†
+_分_
+å–得了_
+è¥
+8
+女
+级
+约
+_å°†_
+ç»
+统
+厂_
+我们
+安
+å­—_
+_厂_
+大_
+#_
+_作
+å·¥_
+æ–‡
+å°†_
+æ–°
+组
+外
+_å·²_
+å·²
+产_
+_中
+_产
+_技术_
+_生产
+过_
+但
+ä»·
+ç»_
+å‡
+å·²_
+_之
+_技
+家_
+事
+ï¼’
+_举行_
+è”åˆå›½_
+中
+严
+è®­
+_å…ƒ_
+_ç­‰_
+å¾€_
+之
+_但
+_å‘展
+问题
+_机
+åŽ
+_å–得了_
+_市_
+政治_
+厂
+_战
+å–
+务
+_但_
+é•¿_
+政_
+几
+刚
+_æˆ
+_å_
+事_
+_会
+_8_
+会主义
+部门_
+_女
+_ï¼’_
+机_
+_å·²
+_â€
+ç†_
+å…¬
+å„
+技术
+å…ƒ_
+_èŒå·¥
+ç»æµŽ_
+举行
+_#_
+_军民
+_问题
+义_
+之_
+举行_
+_æ–°
+解
+_两
+_è¿™_
+得了
+_å¾—_
+_几_
+_é—®
+两_
+我们_
+进行
+å–å¾—
+_æ–‡
+_ä»–_
+_é•¿
+_è”åˆ
+_过_
+_计
+_进行_
+法国_
+建设_
+_说
+题_
+é‡
+_政治
+_市
+里
+_解
+_政府
+å£
+è¦
+_å‡
+å‘展_
+è”åˆå›½
+_组
+几_
+_我们_
+åŽ_
+_ç»
+_美
+æ¥_
+åˆå›½_
+济_
+å£_
+â€_
+å…ƒ
+#
+府_
+军民
+并_
+èŒå·¥
+_8
+æˆ_
+约_
+_ç­‰
+级_
+次_
+_社会主
+_ï¼’
+_政治_
+_â€_
+_英
+得了_
+_#
+_å‘展_
+_社
+生产
+_æ¡
+军民_
+â€
+_èŒ
+_å–得了
+_è”
+å°±_
+_我们
+å‘展
+两
+è®®
+_å°
+_è”åˆå›½
diff --git a/libtextcat/data/new_fingerprints/lm/chinese_traditional.lm b/libtextcat/data/new_fingerprints/lm/chinese_traditional.lm
new file mode 100644
index 000000000000..6708981ba6e7
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/chinese_traditional.lm
@@ -0,0 +1,400 @@
+_
+ï¹_
+_ï¹_
+ï¹
+_ï¹
+çš„
+_çš„
+çš„_
+_çš„_
+。
+_。
+。_
+_。_
+å¹´
+_會
+會_
+_會_
+å¹´_
+_å¹´_
+é›»
+會
+_å¹´
+_在
+é›»_
+_是_
+_é›»_
+一
+æ–¼
+å­¸_
+_æ–¼_
+_æ–¼
+在
+_é›»
+å­¸
+是_
+港
+_å­¸_
+在_
+_是
+æ–¼_
+_在_
+_å­¸
+是
+ä¸_
+_å
+_ä¸
+_港_
+_港
+_ä¸_
+å_
+_一
+å“¡
+å“¡_
+_一_
+ä¸
+_å_
+一_
+港_
+_ç½²
+_å“¡_
+_å“¡
+_業
+業
+_ç½²_
+業_
+_業_
+ç½²_
+ç½²
+_事
+_åŒ
+æ©Ÿ
+_國_
+,
+ã€
+_,_
+_å…§
+ã€_
+_外_
+_åŒ_
+_政_
+_å…¬_
+為
+外
+å…¬
+_å…¬
+有
+_å…§_
+åŒ
+_政
+國_
+國
+å…¬_
+香
+_事_
+外_
+事
+å…§_
+_,
+_ã€
+政
+_外
+_ã€_
+香_
+政_
+å…§
+åŒ_
+事_
+,_
+_國
+為_
+_é•·_
+_較_
+_æ–¹_
+_用_
+ç­‰_
+ç²
+錦
+_æ–‡_
+_中
+_錦
+用
+_通
+_較
+_ç­‰
+有_
+_é•·
+_è·¯
+通
+_è­¦_
+_åŠ
+_åŠ_
+_è­¦
+_香
+錦_
+_訴
+中_
+_話
+_有_
+通_
+時_
+ç²_
+_予
+è·¯_
+較_
+æ–¹_
+ç”°
+è­¦
+æ©Ÿ_
+_訴_
+_ç”°_
+ç­‰
+_有
+話_
+_æ©Ÿ_
+_予_
+è·¯
+時
+æ–¹
+æ—¥
+è­¦_
+_通_
+訴_
+_用
+_æ–¹
+åŠ_
+ç”°_
+_話_
+_中_
+_香_
+訴
+_ç”°
+_æ–‡
+_錦_
+三
+較
+用_
+_時_
+中
+_ç­‰_
+åŠ
+話
+_æ©Ÿ
+_ç²
+ä¹
+æ–‡
+_時
+_è·¯_
+_ç²_
+é•·
+_為_
+_為
+予
+é•·_
+予_
+æ–‡_
+_æ—¥_
+ä¹_
+途_
+_高
+è£_
+æ•…_
+_室
+_發_
+_控_
+å‰_
+回:
+_查_
+_三
+_安
+_以
+_控
+途
+_安_
+_è£
+_士_
+_雇
+_大_
+_上_
+_èµ·
+出_
+_三_
+_人_
+_èµ·_
+室_
+_人
+_分_
+_至_
+上_
+_出_
+第
+_*
+至
+_查
+程
+_ç…™_
+_生_
+_以_
+_åª
+者
+_å¯
+三_
+_*_
+分_
+_途
+_å‰
+_分
+_ç·š_
+_出
+發
+_與_
+生
+ç”±
+å¯_
+ç…™_
+_ç”±_
+_ç·š
+者_
+至_
+æ—¥_
+åª_
+ç…™
+發_
+_使
+_使_
+查
+_第
+_雇_
+以_
+_åª_
+控_
+安_
+*
+與_
+ç·š
+_與
+_至
+改
+人_
+大_
+_上
+_高_
+æ•…
+èµ·_
+高
+ç·š_
+大
+控
+士
+_æ•…_
+_作_
+使
+_ç”±
+ç”±_
+_途_
+安
+作
+*_
+以
+_生
+來
+_大
+回
+_æ—¥
+_å‰_
+å››
+_ä¹_
+_ä¹
+人
+_è£_
+上
+_發
+_作
+士_
+_æ•…
+作_
+高_
+雇
+åª
+å¯
+:
+è£
+使_
+èµ·
+出
+å‰
+_ç…™
+_å¯_
+雇_
+_室_
+生_
+å…«
+_士
+查_
+分
+室
+與
+也_
+肇_
+和_
+_物_
+_如_
+_霧_
+主_
+_商_
+ç´„
+_府_
+到_
+é”
+é“
+都_
+黨_
+éŽ
+_é‡_
+_者_
+未_
+_ä¿®_
+_ç´„_
+別_
+郵_
+_能_
+來_
+_霧
+_車
+ï¼
+_å¸_
+_排
+_該_
+_柙_
+被_
+總_
+_越
+å‘Ž_
+_資
+_排_
+期_
+_夜_
+_ä¿®
+å·´_
+_育_
+_歌_
+_號_
+_åœ_
+_調
+_亦_
+_æ ¡
+_)
+_後_
+å…¥_
+_便
+_訊
+_期_
+脹_
+_而_
+_é‡
+o
+_該
+_o
+_(
+程_
diff --git a/libtextcat/data/new_fingerprints/lm/croatian.lm b/libtextcat/data/new_fingerprints/lm/croatian.lm
new file mode 100644
index 000000000000..b054ac34ab89
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/croatian.lm
@@ -0,0 +1,400 @@
+_ 36598
+a 9456
+o 9050
+i 8526
+e 7955
+n 5513
+j 5379
+s 4971
+t 4125
+r 3889
+u 3423
+je 3281
+l 3231
+e_ 3177
+d 3128
+k 2992
+v 2891
+a_ 2835
+o_ 2679
+m 2645
+p 2316
+_s 2212
+i_ 2205
+y 2174
+c 1894
+z 1867
+je_ 1613
+_n 1593
+g 1581
+_p 1490
+b 1386
+u_ 1382
+, 1208
+,_ 1208
+st 1203
+_j 1202
+na 1159
+_je 1118
+_d 1105
+_i 1104
+. 1069
+._ 1058
+ra 1017
+ko 981
+ni 980
+_je_ 918
+ij 914
+ti 911
+no 891
+da 884
+to 879
+_k 863
+_o 856
+cy 839
+li 838
+ne 790
+sy 786
+- 785
+pr 760
+_u 748
+po 734
+ta 711
+_b 705
+_t 704
+la 699
+ja 699
+an 691
+m_ 680
+ov 674
+ije 673
+_z 669
+ka 662
+ri 652
+lo 651
+vo 643
+re 641
+is 626
+in 623
+se 623
+va 622
+_m 617
+oj 614
+_po 594
+_pr 592
+bi 588
+en 582
+il 578
+os 576
+vi 572
+nj 569
+ak 564
+_na 558
+im 558
+da_ 553
+od 550
+ao 550
+al 549
+om 545
+na_ 533
+_se 524
+_i_ 523
+h 517
+_g 511
+ma 508
+at 502
+cj 501
+og 497
+-- 493
+ro 493
+on 485
+av 480
+_da 474
+_ne 474
+ed 472
+zy 471
+se_ 466
+_se_ 458
+_bi 457
+ao_ 452
+ad 452
+lj 445
+es 444
+ji 441
+_v 433
+_c 430
+za 421
+go 418
+_r 413
+_u_ 411
+yi 410
+ek 406
+di 403
+sa 396
+et 393
+ic 391
+io 390
+_da_ 389
+to_ 389
+as 386
+_ko 383
+ye 378
+ar 378
+mo 373
+le 367
+or 364
+ju 363
+el 354
+tr 354
+io_ 352
+ve 345
+lo_ 344
+su 344
+ol 344
+am 344
+iz 343
+li_ 338
+ti_ 336
+sto 323
+n_ 321
+it 319
+_za 319
+sta 317
+_ni 316
+te 312
+nu 312
+a, 309
+a,_ 309
+ya 309
+do 304
+om_ 303
+la_ 300
+no_ 299
+_na_ 299
+d_ 297
+ko_ 296
+aj 294
+ik 292
+ru 291
+ga 291
+em 288
+nje 283
+dj 281
+ne_ 281
+k_ 277
+_st 276
+koj 276
+ec 274
+_ka 272
+_su 270
+ob 265
+-_ 263
+az 261
+sv 260
+_koj 260
+im_ 260
+ije_ 259
+pa 258
+ot 257
+yt 256
+ok 255
+su_ 255
+ih 254
+me 253
+dn 253
+_cy 253
+iv 251
+syt 248
+g_ 247
+--_ 246
+_- 246
+kr 246
+--- 246
+a. 245
+e,_ 245
+---_ 245
+e, 245
+er 245
+a._ 244
+_iz 244
+mi 243
+_---_ 242
+cyi 242
+_-- 242
+_--- 242
+jed 240
+h_ 239
+_a 239
+_sa 237
+j_ 236
+_l 231
+_sv 229
+_to 229
+sk 228
+ih_ 224
+ja_ 223
+pro 223
+yn 222
+t_ 222
+ost 221
+_do 221
+oc 219
+gl 218
+_su_ 216
+og_ 216
+uc 214
+s_ 214
+bil 213
+oz 213
+ki 212
+ni_ 212
+nij 209
+ako 208
+eg 208
+ut 205
+pre 205
+ci 204
+ji_ 203
+_od 203
+ilo 202
+ati 202
+ac 201
+ns 200
+_mo 197
+rij 196
+bo 195
+ovo 195
+ku 195
+dje 194
+ma_ 192
+_bil 191
+cje 186
+sti 186
+_go 186
+de 185
+sl 183
+_pro 182
+ju_ 182
+nije 181
+tv 180
+lje 179
+isy 179
+pri 178
+_pre 177
+dr 177
+e._ 177
+e. 177
+op 176
+ima 176
+anj 175
+jen 175
+us 172
+ilo_ 172
+_ra 170
+S 167
+ecj 166
+iti 166
+sp 163
+_S 161
+vr 161
+i. 161
+i._ 161
+zn 161
+ali 161
+i,_ 160
+i, 160
+ap 157
+nije_ 157
+nst 156
+pi 156
+ga_ 156
+_sy 155
+_nj 155
+jes 155
+ran 155
+vo_ 155
+yto 154
+ev 153
+_to_ 152
+_pri 151
+est 150
+N 150
+ins 150
+ist 149
+ir 149
+o, 148
+vj 148
+vje 148
+o,_ 148
+B 148
+zi 147
+jec 147
+gov 147
+yto_ 147
+syto 146
+ton 146
+od_ 145
+O 144
+rije 144
+lik 143
+on_ 142
+ocy 142
+W 142
+ba 142
+_W 141
+kao 141
+Wi 141
+_N 141
+inst 141
+_nij 141
+_nije 140
+_Wi 140
+syto_ 140
+lic 139
+P 139
+ovi 138
+_tr 138
+rs 137
+ez 137
+edn 136
+_P 136
+si 136
+ili 136
+du 136
+Winst 135
+cye 135
+nston 135
+ston 135
+ud 135
+kao_ 135
+Wins 135
+insto 135
+oji 135
+nsto 135
+Win 135
+raz 135
+zye 135
+_Win 134
+ova 134
+_Wins 134
+_on 133
+ako_ 133
+odi 133
+cya 133
+ila 133
+icy 133
+oj_ 133
+ke 133
+va_ 132
+ija 132
+_is 132
+jel 132
+oje 130
+pu 130
+cje_ 130
+bi_ 129
+rat 128
+ce 128
+tu 128
+mu 128
+ve_ 127
diff --git a/libtextcat/data/new_fingerprints/lm/czech.lm b/libtextcat/data/new_fingerprints/lm/czech.lm
new file mode 100644
index 000000000000..097bbc5b2241
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/czech.lm
@@ -0,0 +1,400 @@
+_ 26378
+o 5870
+e 5354
+a 4740
+n 4462
+t 3745
+s 3438
+i 3187
+v 3118
+l 2891
+r 2754
+k 2566
+d 2508
+m 2198
+u 2197
+p 2032
+í 1924
+c 1678
+h 1572
+z 1544
+á 1522
+_p 1299
+e_ 1266
+y 1231
+a_ 1219
+j 1219
+_s 1156
+b 1079
+o_ 1065
+Ä› 1043
+_v 1038
+, 952
+,_ 946
+st 945
+_n 911
+é 878
+. 870
+í_ 848
+Å™ 822
+._ 803
+ní 782
+ov 757
+_z 723
+i_ 716
+u_ 684
+ro 674
+en 656
+ý 647
+ž 639
+po 630
+ch 629
+Ä 625
+na 599
+_a 598
+sk 592
+Å¡ 563
+ho 552
+_d 551
+ra 545
+m_ 539
+y_ 530
+_t 512
+ko 504
+_k 503
+le 502
+_j 501
+_o 486
+to 479
+pr 471
+ne 468
+ní_ 462
+je 458
+é_ 456
+ti 455
+od 433
+li 432
+va 432
+_po 429
+_m 428
+al 424
+te 424
+ou 423
+ed 418
+se 415
+la 410
+no 397
+os 382
+lo 377
+an 376
+_pr 375
+ů 375
+v_ 372
+ře 370
+_a_ 364
+em 363
+at 360
+ta 359
+do 357
+t_ 357
+_b 355
+or 349
+h_ 345
+_v_ 338
+ch_ 327
+S 325
+ce 323
+av 323
+pÅ™ 322
+ni 319
+ké 316
+er 315
+nÄ› 315
+_na 313
+na_ 312
+_ne 311
+de 308
+ic 307
+in 306
+_se 306
+l_ 304
+dn 302
+za 298
+_pÅ™ 293
+ě_ 291
+ol 290
+_je 281
+ob 280
+is 277
+ve 274
+ho_ 272
+es 270
+ot 268
+ak 265
+vo 263
+ná 260
+il 257
+se_ 257
+it 256
+et 253
+ad 250
+by 249
+P 242
+_r 242
+k_ 242
+ost 241
+_se_ 241
+tr 238
+me 237
+pro 234
+že 234
+ka 230
+_za 227
+om 224
+el 223
+_P 223
+on 218
+_pro 216
+ou_ 216
+tu 215
+O 212
+mi 212
+ku 211
+_u 210
+_do 208
+_l 207
+_na_ 206
+N 205
+ské 205
+ím 205
+íc 205
+ý_ 203
+mÄ› 203
+_S 200
+oz 200
+V 200
+ze 198
+da 194
+sl 192
+á_ 191
+ova 190
+mo 190
+re 189
+so 187
+vy 186
+ej 185
+rá 184
+ar 184
+s_ 183
+vÄ› 183
+A 181
+ru 180
+_st 178
+f 178
+éh 177
+ého 176
+kt 176
+tn 175
+g 174
+bo 174
+ez 173
+ci 172
+ký 172
+né 170
+M 170
+án 169
+as 168
+vi 167
+ři 167
+ac 166
+že_ 165
+ýc 165
+ž_ 165
+ck 164
+K 164
+B 164
+ých 164
+vá 162
+_c 162
+ého_ 162
+ení 161
+lá 160
+_ž 160
+ří 160
+pře 159
+ec 158
+ů_ 157
+J 156
+vn 156
+_h 155
+ké_ 155
+ok 154
+sta 154
+to_ 152
+vý 152
+nt 151
+ých_ 149
+lo_ 149
+_by 149
+dy 149
+_pře 148
+ce_ 147
+R 146
+n_ 146
+ád 146
+pa 145
+vé 145
+am 145
+mu 145
+ný 145
+ud 144
+_Ä 144
+_B 142
+ter 141
+ně_ 141
+Äe 140
+Å¡e 140
+_V 140
+_ko 140
+li_ 139
+dÄ› 137
+hl 137
+je_ 137
+ji 137
+ist 135
+jí 135
+- 134
+ik 133
+si 133
+ál 132
+em_ 132
+_to 132
+_vy 131
+sp 130
+ut 130
+_J 130
+_že 130
+_M 129
+di 129
+kon 128
+la_ 128
+tí 128
+_ro 127
+ns 127
+ek 126
+ick 126
+T 126
+yl 125
+Än 125
+rn 125
+_že_ 124
+op 124
+sti 124
+kte 124
+řed 124
+edn 123
+us 121
+ím_ 120
+" 119
+z_ 119
+str 118
+ti_ 118
+ém 118
+vat 118
+d_ 118
+_ve 118
+áv 118
+ení_ 117
+iv 117
+oj 117
+_kt 117
+ěl 117
+val 116
+eb 116
+_K 115
+tÅ™ 115
+_kte 115
+át 115
+_i 114
+_N 114
+D 114
+kl 114
+ny 114
+byl 113
+ev 113
+ri 113
+ú 113
+ky 111
+sm 111
+oh 111
+E 110
+ma 110
+ět 110
+ský 110
+kter 109
+nu 109
+le_ 108
+ja 107
+zá 107
+tÄ› 106
+tá 106
+zn 106
+let 105
+aj 105
+sto 105
+ší 105
+me_ 105
+-_ 105
+u, 105
+sv 104
+_le 104
+pol 104
+L 104
+_- 104
+ln 104
+pod 104
+H 103
+zi 103
+kéh 103
+kého 103
+_ná 103
+ent 102
+u,_ 102
+_od 102
+né_ 101
+_-_ 101
+al_ 101
+_kter 101
+do_ 100
+nos 100
+cí 100
+rav 99
+ran 99
+_sv 99
+ká 98
+eÄ 98
+ním 98
+bu 97
+_z_ 97
+ích 97
+_byl 97
+ání 96
+rov 96
+kr 96
+dá 95
+ové 95
+dl 95
+uj 95
+nost 95
+ém_ 95
+ech 94
+ly 94
+oc 94
+vé_ 93
+_o_ 93
+dy_ 93
+ak_ 93
+nsk 93
+_so 93
+_ob 92
+nc 92
diff --git a/libtextcat/data/new_fingerprints/lm/danish.lm b/libtextcat/data/new_fingerprints/lm/danish.lm
new file mode 100644
index 000000000000..5e5a61a98638
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/danish.lm
@@ -0,0 +1,400 @@
+_ 21274
+e 9291
+r 5307
+n 4733
+i 3976
+t 3948
+s 3751
+a 3296
+l 3063
+d 3025
+o 2868
+g 2471
+er 2164
+k 2002
+m 1680
+e_ 1655
+en 1613
+f 1507
+de 1484
+r_ 1379
+v 1245
+u 1176
+t_ 1081
+n_ 1032
+er_ 992
+b 942
+. 870
+ge 868
+._ 831
+re 816
+h 816
+et 813
+te 813
+p 806
+in 788
+or 775
+_s 753
+_a 749
+en_ 712
+_e 691
+ti 689
+an 687
+, 681
+,_ 677
+_f 655
+_d 645
+el 642
+ng 635
+nd 634
+g_ 634
+se 615
+le 615
+st 607
+s_ 601
+_o 572
+ne 560
+li 537
+et_ 524
+es 521
+_i 512
+ri 511
+sk 510
+_de 498
+Ã¥ 497
+ar 475
+ed 473
+ig 463
+at 452
+_m 446
+is 443
+fo 441
+æ 441
+ve 438
+_k 434
+ø 432
+der 429
+ke 428
+ing 427
+og 426
+_b 412
+me 408
+il 407
+for 405
+ns 394
+y 389
+_h 380
+_t 374
+on 371
+d_ 370
+al 362
+be 359
+_fo 351
+af 336
+de_ 335
+_og 333
+_p 332
+og_ 325
+om 325
+_for 324
+_og_ 313
+l_ 308
+nge 302
+i_ 295
+_v 294
+c 289
+ter 283
+ll 280
+ni 278
+nde 278
+rs 277
+_af 277
+un 275
+ra 271
+ko 271
+den 270
+_i_ 268
+id 265
+til 265
+j 265
+vi 264
+D 260
+ere 256
+ma 255
+si 253
+f_ 252
+af_ 238
+_af_ 235
+ik 235
+m_ 234
+Ã¥_ 232
+_ti 227
+_D 226
+_u 226
+_er 225
+nt 224
+_en 224
+ls 221
+es_ 216
+lig 216
+ger 216
+re_ 210
+ag 210
+_me 207
+at_ 204
+lle 200
+ge_ 200
+_til 200
+ige 199
+_er_ 199
+der_ 199
+em 199
+ds 197
+r. 195
+io 195
+r._ 195
+ud 193
+_at 192
+_at_ 191
+ta 190
+els 190
+_l 190
+ha 190
+il_ 189
+or_ 189
+ke_ 186
+rt 185
+gen 184
+ka 183
+- 180
+rk 180
+ning 178
+ol 178
+nin 178
+la 177
+ld 175
+De 175
+it 173
+ede 172
+ed_ 171
+_ko 171
+lse 171
+ek 168
+else 167
+inge 167
+på 167
+ng_ 167
+_på 167
+iv 166
+ør 166
+so 165
+he 165
+ens 165
+ske 165
+ind 164
+til_ 163
+rn 163
+ide 162
+ev 162
+den_ 162
+to 162
+sen 160
+_be 160
+sa 160
+bl 158
+_g 158
+an_ 157
+det 156
+om_ 156
+ru 156
+va 155
+_til_ 155
+ste 154
+rd 153
+_på_ 152
+k_ 152
+på_ 152
+di 152
+kr 152
+K 151
+_De 149
+for_ 148
+te_ 148
+kon 148
+ver 147
+mm 146
+am 146
+_en_ 145
+_r 145
+ne_ 144
+ing_ 144
+tr 143
+le_ 142
+del 142
+_in 142
+gt 140
+_st 138
+S 138
+eg 138
+gs 138
+tt 138
+r, 137
+ser 137
+r,_ 137
+er. 137
+ro 137
+er._ 137
+_for_ 136
+ent 136
+kt 136
+eri 135
+ur 134
+lin 134
+B 133
+A 133
+sti 133
+ner 133
+da 133
+ris 132
+ion 132
+_K 131
+ern 131
+ers 130
+ist 130
+ær 130
+ige_ 130
+_si 130
+tte 129
+E 128
+_n 128
+nn 127
+_B 126
+_ha 126
+_. 126
+rne 125
+H 125
+_ud 125
+rin 124
+na 124
+und 124
+ft 124
+_der 124
+ku 123
+_A 122
+ler 120
+and 120
+end 120
+ns_ 120
+rg 119
+op 119
+er,_ 119
+er, 119
+ar_ 118
+P 118
+_S 117
+_H 117
+_._ 116
+ov 116
+erne 115
+tio 115
+med 115
+tion 115
+_E 115
+_P 115
+det_ 114
+pr 114
+e. 113
+ter_ 113
+: 113
+kk 113
+e._ 113
+e,_ 113
+e, 113
+od 113
+kke 113
+ten 113
+ling 113
+:_ 112
+mi 112
+eli 112
+lo 111
+som 111
+_den 111
+rb 110
+se_ 110
+ell 110
+sid 110
+nne 109
+fi 108
+lt 107
+v_ 107
+_de_ 107
+ark 106
+lige 106
+ngen 106
+ie 105
+_med 105
+_der_ 105
+ring 105
+a_ 105
+_vi 104
+-_ 104
+ys 103
+gel 103
+_so 103
+ia 103
+ive 102
+ej 101
+ati 101
+ren 101
+_det 101
+side 101
+ske_ 101
+br 100
+gi 100
+F 100
+M 100
+ul 99
+isk 99
+men 99
+n,_ 99
+age 99
+fr 99
+n, 99
+tu 98
+ts 98
+_ma 98
+nder 98
+ot 97
+dt 97
+R 97
+med_ 96
+ho 96
+ans 95
+_kon 95
+pe 95
+ce 94
+gr 93
+mme 92
+ret 92
+lige_ 92
+mu 91
+_med_ 91
+hv 91
+væ 91
+Det 91
+ens_ 91
+kl 91
+_M 90
+T 90
+ingen 90
+rm 90
+ill 89
+elle 89
+ef 89
+ene 89
+nds 89
+ove 89
+som_ 89
+C 88
+_den_ 88
diff --git a/libtextcat/data/new_fingerprints/lm/drents.lm b/libtextcat/data/new_fingerprints/lm/drents.lm
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/drents.lm
diff --git a/libtextcat/data/new_fingerprints/lm/dutch.lm b/libtextcat/data/new_fingerprints/lm/dutch.lm
new file mode 100644
index 000000000000..17a0626982a5
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/dutch.lm
@@ -0,0 +1,400 @@
+_ 20104
+e 9848
+n 5323
+a 3733
+t 3683
+i 3490
+r 3195
+d 2876
+o 2845
+n_ 2443
+en 2439
+s 2195
+e_ 1842
+l 1837
+g 1522
+en_ 1500
+de 1489
+er 1388
+t_ 1377
+v 1253
+u 1217
+k 1204
+_d 1136
+h 1102
+m 1084
+an 939
+te 875
+j 857
+in 810
+_v 793
+r_ 751
+de_ 742
+ee 737
+p 732
+et 718
+ge 716
+aa 708
+b 703
+_e 686
+st 669
+z 668
+ie 662
+_de 655
+w 631
+c 611
+. 604
+s_ 582
+_de_ 576
+_h 572
+el 570
+ij 564
+._ 554
+et_ 531
+an_ 522
+he 505
+_o 497
+nd 478
+_i 475
+ar 459
+_m 451
+re 442
+ve 441
+' 428
+or 424
+ng 421
+at 418
+_s 415
+oo 403
+_z 401
+le 395
+_b 394
+_a 391
+_he 386
+va 385
+er_ 381
+me 372
+_w 368
+f 361
+on 351
+_t 351
+_va 345
+_g 342
+di 342
+nt 340
+, 335
+g_ 335
+,_ 334
+van 327
+ch 326
+is 326
+ing 325
+be 325
+ni 320
+it 317
+een 316
+_van 315
+al 310
+den 309
+ti 309
+van_ 307
+oe 302
+ke 302
+_van_ 299
+aar 299
+d_ 295
+we 293
+da 292
+tu 290
+_ee 290
+ud 287
+een_ 286
+li 284
+es 282
+_st 281
+ver 281
+ten 281
+ri 275
+nde 275
+der 274
+_in 270
+k_ 268
+vo 267
+het 266
+oor 264
+_het 262
+het_ 262
+_het_ 259
+_een 258
+l_ 258
+ze 257
+_n 254
+ro 248
+gen 243
+_een_ 241
+at_ 240
+op 238
+n. 238
+_en 237
+rs 237
+_da 235
+stu 232
+in_ 230
+_be 229
+_ge 228
+_k 226
+rd 226
+tud 220
+_en_ 220
+n._ 217
+te_ 209
+ei 208
+ent 206
+_me 203
+la 202
+ek 202
+ed 201
+ra 200
+stud 200
+en. 200
+ie_ 197
+ste 196
+_vo 195
+_in_ 193
+_stu 191
+zi 191
+om 189
+ui 189
+en._ 186
+ten_ 185
+_stud 185
+ude 184
+die 183
+ns 183
+_j 181
+D 179
+aan 179
+se 179
+ma 178
+_ve 176
+ne 174
+_p 174
+eg 173
+p_ 172
+ar_ 172
+aar_ 171
+_te 170
+ng_ 169
+_we 169
+'' 167
+_D 165
+ers 164
+_op 163
+dat 161
+dat_ 160
+ig 160
+ere 159
+eer 158
+_zi 158
+voor 156
+voo 156
+nge 155
+nder 151
+nte 151
+or_ 150
+ta 150
+je 149
+ing_ 148
+ll 148
+_ver 147
+jk 146
+oor_ 146
+_dat 145
+ijk 145
+ren 145
+is_ 145
+_dat_ 144
+_l 144
+and 144
+lij 143
+ter 143
+na 142
+uden 139
+tude 138
+_voor 136
+_voo 136
+ond 136
+ken 135
+cht 135
+_al 135
+ht 135
+wa 134
+ho 133
+em 133
+den_ 133
+pe 132
+sc 132
+un 131
+ur 131
+_di 130
+gen_ 130
+zo 129
+rt 129
+ev 128
+mo 128
+lijk 127
+_is 126
+stude 124
+ha 123
+to 122
+el_ 121
+og 121
+op_ 121
+sch 120
+ol 120
+ente 119
+_u 118
+pr 118
+end 118
+mi 117
+iet 116
+_aa 116
+eli 115
+dent 115
+ijn 115
+jn 115
+ou 115
+men 114
+_' 114
+tie 113
+_is_ 113
+nie 113
+tr 112
+ak 112
+id 112
+udent 111
+tuden 111
+uit 110
+_te_ 109
+aan_ 109
+ld 109
+S 108
+_aan 108
+ede 108
+ja 107
+nten 107
+it_ 107
+je_ 107
+ts 107
+erd 106
+est 106
+E 105
+_op_ 105
+ad 104
+al_ 104
+_ze 104
+_on 104
+rk 104
+lle 103
+ens 103
+gel 103
+m_ 103
+len 103
+_r 102
+ec 102
+inge 102
+met 102
+_met 101
+si 100
+die_ 100
+us 100
+onde 99
+_ni 99
+De 99
+eu 99
+dente 99
+enten 99
+ic 99
+_met_ 98
+f_ 98
+met_ 98
+no 97
+ko 96
+voor_ 96
+rde 96
+H 96
+ngen 95
+lo 95
+ot 95
+as 94
+zij 93
+_nie 92
+vi 92
+eb 92
+_De 92
+_zij 91
+ep 91
+wi 91
+_zo 91
+kt 91
+ege 91
+G 91
+bi 90
+j_ 90
+ij_ 90
+ze_ 90
+do 90
+lan 89
+ov 89
+udi 89
+ord 89
+onder 89
+V 88
+elij 88
+_wa 88
+elijk 88
+ef 88
+_die 87
+ag 86
+erk 86
+eren 86
+R 85
+ik 85
+_ma 85
+gr 85
+am 85
+_mo 84
+ul 84
+nn 83
+eve 83
+De_ 83
+maa 83
+ingen 83
+wo 83
+_'' 83
+O 83
+tudi 82
+I 82
+nt_ 82
+tudie 81
+ven 81
+udie 81
+nten_ 81
+_die_ 81
+jaa 80
+ka 80
+eke 80
+ite 80
+a_ 80
+_je 80
+ac 80
+jaar 80
+_je_ 79
+_H 79
+_zijn 79
+zijn 79
+n, 78
+nen 78
+N 78
+n,_ 78
+ijn_ 77
diff --git a/libtextcat/data/new_fingerprints/lm/english.lm b/libtextcat/data/new_fingerprints/lm/english.lm
new file mode 100644
index 000000000000..ab71632c6214
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/english.lm
@@ -0,0 +1,400 @@
+_ 20326
+e 6617
+t 4843
+o 3834
+n 3653
+i 3602
+a 3433
+s 2945
+r 2921
+h 2507
+e_ 2000
+d 1816
+_t 1785
+c 1639
+l 1635
+th 1535
+he 1351
+_th 1333
+u 1309
+f 1253
+m 1175
+p 1151
+_a 1145
+the 1142
+_the 1060
+s_ 978
+er 968
+_o 967
+he_ 928
+d_ 888
+t_ 885
+the_ 844
+_the_ 843
+on 842
+in 817
+y 783
+n_ 773
+b 761
+re 754
+, 734
+,_ 732
+an 732
+g 728
+w 718
+_i 707
+en 676
+f_ 599
+y_ 595
+of 594
+_of 592
+es 589
+ti 587
+v 580
+_of_ 575
+of_ 575
+nd 568
+at 549
+r_ 540
+_w 534
+it 522
+ed 496
+_p 494
+nt 485
+_c 462
+o_ 457
+io 450
+_an 439
+te 432
+or 425
+_b 418
+nd_ 407
+to 406
+st 402
+is 401
+_s 396
+_in 389
+ion 385
+and 385
+de 384
+ve 382
+ha 375
+ar 366
+_m 361
+and_ 360
+_and 360
+_and_ 358
+se 353
+_to 347
+me 346
+to_ 344
+ed_ 339
+. 330
+be 329
+_f 329
+._ 329
+_to_ 320
+co 317
+ic 316
+ns 308
+al 307
+le 304
+ou 304
+ce 293
+ent 279
+l_ 278
+_co 277
+tio 275
+on_ 274
+_d 274
+tion 268
+ri 266
+_e 264
+ng 253
+hi 251
+er_ 249
+ea 246
+as 245
+_be 242
+pe 242
+h_ 234
+_r 232
+ec 227
+ch 223
+ro 222
+ct 220
+_h 219
+pr 217
+in_ 217
+ne 214
+ll 214
+rt 213
+s,_ 210
+s, 210
+li 209
+ra 208
+T 207
+wh 204
+a_ 203
+ac 201
+_wh 199
+_n 196
+ts 196
+di 196
+es_ 195
+si 194
+re_ 193
+at_ 192
+nc 192
+ie 190
+_a_ 188
+_in_ 185
+ing 184
+us 182
+_re 182
+g_ 179
+ng_ 178
+op 178
+con 177
+tha 175
+_l 174
+_tha 174
+ver 173
+ma 173
+ion_ 171
+_con 171
+ci 170
+ons 170
+_it 170
+po 169
+ere 168
+is_ 167
+ta 167
+la 166
+_pr 165
+fo 164
+ho 164
+ir 162
+ss 161
+men 160
+be_ 160
+un 159
+ty 159
+_be_ 158
+ing_ 157
+om 156
+ot 156
+hat 155
+ly 155
+_g 155
+em 153
+_T 151
+rs 150
+mo 148
+ch_ 148
+wi 147
+we 147
+ad 147
+ts_ 145
+res 143
+_wi 143
+I 143
+hat_ 142
+ei 141
+ly_ 141
+ni 140
+os 140
+ca 139
+ur 139
+A 138
+ut 138
+that 138
+_that 137
+ati 137
+_fo 137
+st_ 137
+il 136
+or_ 136
+for 136
+pa 136
+ul 135
+ate 135
+ter 134
+it_ 134
+nt_ 133
+that_ 132
+_ha 129
+al_ 128
+el 128
+as_ 127
+ll_ 127
+_ma 125
+no 124
+ment 124
+an_ 124
+tion_ 122
+su 122
+bl 122
+_de 122
+nce 120
+pl 120
+fe 119
+tr 118
+so 118
+int 115
+ov 114
+e, 114
+e,_ 114
+_u 113
+ent_ 113
+Th 113
+her 113
+j 112
+atio 112
+ation 112
+_Th 111
+le_ 110
+ai 110
+_it_ 110
+_on 110
+_for 109
+ect 109
+k 109
+hic 108
+est 108
+der 107
+tu 107
+na 106
+_by_ 106
+by_ 106
+E 106
+by 106
+_by 106
+ve_ 106
+_di 106
+en_ 104
+vi 104
+m_ 103
+_whi 102
+iv 102
+whi 102
+ns_ 102
+_A 101
+ich 100
+ge 100
+pro 99
+ess 99
+_whic 99
+ers 99
+hich 99
+ce_ 99
+which 99
+whic 99
+all 98
+ove 98
+_is 98
+ich_ 97
+ee 97
+hich_ 97
+n,_ 96
+n, 96
+im 95
+ir_ 94
+hei 94
+ions 94
+sti 94
+se_ 94
+per 93
+The 93
+_pa 93
+heir 93
+id 93
+eir 93
+eir_ 93
+ig 93
+heir_ 93
+_no 93
+ev 93
+era 92
+_int 92
+ted 91
+_The 91
+ies 91
+art 91
+thei 90
+_ar 90
+_thei 90
+their 90
+_pro 90
+et 89
+_pe 88
+_mo 88
+ther 88
+x 87
+gh 87
+S 87
+_is_ 87
+ol 87
+ty_ 87
+_I 86
+nde 86
+am 86
+rn 86
+nte 86
+mp 85
+_su 84
+_we 84
+par 84
+_v 84
+pu 82
+his 82
+ow 82
+mi 82
+go 81
+N 81
+ue 81
+ple 81
+ep 80
+ab 80
+;_ 80
+; 80
+ex 80
+ain 80
+over 80
+_un 79
+q 79
+qu 79
+pp 79
+ith 79
+ry 79
+_as 79
+ber 79
+ub 78
+av 78
+uc 78
+s._ 77
+s. 77
+enc 77
+are 77
+iti 77
+gr 76
+his_ 76
+ua 76
+part 76
+ff 75
+eve 75
+O 75
+rea 74
+ous 74
+ia 74
+The_ 73
+ag 73
+mb 73
+_go 73
+fa 72
+on,_ 72
+ern 72
+t,_ 72
+on, 72
+t, 72
+_me 71
diff --git a/libtextcat/data/new_fingerprints/lm/esperanto.lm b/libtextcat/data/new_fingerprints/lm/esperanto.lm
new file mode 100644
index 000000000000..0eef3ec8894a
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/esperanto.lm
@@ -0,0 +1,400 @@
+_ 57050
+a 16035
+i 12706
+e 12227
+o 12102
+n 10393
+s 8344
+l 7707
+r 7492
+t 7134
+k 5376
+u 4558
+j 3946
+a_ 3875
+m 3783
+d 3710
+p 3693
+la 2840
+s_ 2769
+e_ 2751
+. 2706
+_l 2635
+_k 2619
+v 2531
+n_ 2504
+o_ 2444
+i_ 2333
+._ 2278
+on 2238
+, 2193
+,_ 2182
+_la 2100
+en 2080
+j_ 2050
+as 2028
+la_ 2012
+ta 1956
+_la_ 1907
+an 1882
+_p 1850
+g 1831
+_e 1791
+_d 1778
+is 1737
+aj 1658
+st 1635
+_s 1575
+c 1526
+de 1517
+oj 1498
+er 1476
+ti 1456
+f 1443
+_a 1442
+b 1427
+ro 1379
+_m 1351
+ra 1341
+nt 1293
+ka 1270
+ri 1258
+al 1249
+as_ 1248
+aj_ 1213
+to 1209
+_de 1203
+_t 1200
+te 1179
+_n 1176
+is_ 1171
+in 1151
+ko 1145
+or 1114
+es 1083
+re 1034
+ia 1029
+li 1022
+de_ 1016
+_de_ 979
+ar 974
+_v 966
+vi 942
+lo 932
+x 928
+io 917
+ne 855
+no 848
+ni 843
+mi 835
+ma 819
+_ka 816
+el 815
+pr 771
+z 744
+un 734
+l_ 732
+po 730
+_f 725
+ø 724
+est 691
+na 687
+ki 679
+kaj 676
+si 665
+u_ 663
+kaj_ 660
+" 654
+tas 651
+le 650
+oj_ 648
+_i 643
+tr 642
+_pr 630
+_es 628
+jn 626
+pe 618
+_kaj 616
+ig 616
+_kaj_ 611
+do 608
+sta 606
+on_ 602
+ek 602
+ci 597
+r_ 595
+ý 594
+_r 593
+il 592
+_est 587
+di 586
+am 586
+_mi 582
+aý 578
+_vi 577
+mo 575
+ant 565
+_ne 562
+en_ 561
+o. 559
+æ 543
+iu 538
+o, 529
+ur 527
+o._ 527
+om 525
+o,_ 524
+at 521
+va 521
+- 519
+_en 518
+: 513
+:_ 512
+_ti 500
+M 496
+h 488
+nd 484
+me 484
+_al 481
+_ko 479
+ve 478
+ie 478
+_ki 473
+it 473
+L 466
+_b 465
+se 462
+em 452
+ol 450
+nta 449
+tu 448
+ik 444
+ov 443
+da 443
+_M 440
+_po 439
+tas_ 438
+ne_ 437
+et 437
+_ma 436
+_en_ 435
+su 429
+pl 426
+_L 425
+pa 420
+_o 417
+vo 408
+an_ 407
+ro_ 406
+sti 406
+nu 399
+kon 396
+stas 391
+m_ 391
+ir 388
+n. 386
+fa 386
+jn_ 382
+ku 382
+os 376
+ke 375
+n, 375
+esta 374
+n,_ 372
+_su 362
+ta_ 362
+stas_ 359
+xi 359
+Mi 358
+_ne_ 356
+al_ 355
+nk 353
+so 353
+n._ 352
+id 349
+_g 348
+estas 347
+ga 346
+_h 345
+per 345
+_Mi 340
+ok 339
+K 339
+mp 337
+_esta 337
+s,_ 335
+s, 335
+_se 333
+anta 332
+ul 326
+ran 325
+_" 323
+ý_ 322
+te_ 320
+ak 320
+aý_ 320
+ed 320
+rt 319
+ojn 318
+gi 318
+_æ 317
+tis 316
+gx 316
+mal 316
+ia_ 315
+ks 310
+_al_ 310
+mi_ 309
+S 309
+lu 309
+ns 308
+kt 305
+io_ 302
+ent 300
+? 300
+_K 300
+ec 300
+el_ 299
+_- 299
+li_ 299
+E 298
+þ 298
+_li 297
+fo 296
+ter 296
+_re 296
+A 295
+nto 294
+vi_ 292
+La 292
+_mal 290
+nte 288
+sp 287
+sa 287
+_mi_ 279
+ut 278
+op 278
+_ke 277
+bo 277
+ajn 276
+un_ 276
+T 274
+to_ 272
+-_ 272
+bl 272
+_an 271
+_La 271
+øi 269
+_S 268
+_pl 267
+_fa 266
+ni_ 266
+La_ 265
+_E 264
+N 263
+tis_ 263
+_tr 263
+' 262
+! 262
+_-_ 262
+pro 261
+iu_ 261
+iø 261
+nc 260
+_si 259
+du 257
+_kon 256
+ru 255
+_vi_ 254
+_j 253
+ce 251
+ke_ 249
+ap 248
+us 247
+be 247
+im 247
+B 246
+_ku 246
+_La_ 246
+tra 245
+ad 245
+uj 245
+ac 245
+ita 243
+pre 242
+_pro 242
+co 241
+rm 241
+_ni 238
+_pe 236
+?_ 234
+on. 234
+toj 234
+"_ 234
+j. 234
+_ke_ 233
+s. 232
+_A 231
+av 230
+ri_ 230
+_el 229
+por 229
+` 224
+ev 224
+las 223
+P 223
+j._ 221
+eni 220
+_T 220
+_B 219
+j,_ 218
+j, 218
+era 217
+_in 216
+on._ 216
+cx 216
+_N 215
+ion 215
+ab 215
+.. 214
+) 213
+fi 213
+or_ 212
+pri 212
+s._ 212
+_por 210
+ez 210
+in_ 210
+am_ 209
+on,_ 209
+ll 209
+æi 209
+on, 209
+_ve 208
+ris 208
+esti 208
+!_ 207
+men 206
+vas 205
+iel 204
+taj 203
+_c 201
+aro 201
+ank 200
+_pri 200
+jo 200
+ja 200
+ont 200
+lt 199
+_P 199
+igi 199
+_pa 197
+oj. 197
+( 196
+au 195
+oro 195
+ng 195
+_( 194
+sto 194
+ast 194
+ag 193
diff --git a/libtextcat/data/new_fingerprints/lm/estonian.lm b/libtextcat/data/new_fingerprints/lm/estonian.lm
new file mode 100644
index 000000000000..74a7aa014324
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/estonian.lm
@@ -0,0 +1,400 @@
+_ 20738
+a 7004
+e 5699
+i 5321
+s 4731
+t 3769
+l 3448
+u 3446
+n 2902
+k 2584
+d 2202
+m 2043
+a_ 1758
+o 1684
+r 1429
+g 1174
+v 1151
+e_ 1139
+i_ 1136
+_k 1136
+s_ 1077
+h 1009
+, 995
+,_ 995
+_t 953
+p 953
+j 940
+ä 900
+is 896
+st 851
+se 841
+_s 822
+. 821
+as 801
+d_ 801
+le 800
+ta 794
+in 793
+_m 790
+ô 754
+._ 753
+t_ 746
+ma 710
+_p 680
+si 677
+_v 660
+es 636
+al 626
+us 619
+el 602
+_o 596
+_e 586
+ja 580
+_j 563
+te 562
+ü 549
+li 532
+va 515
+id 501
+ol 498
+tu 497
+da 490
+_n 480
+ku 478
+ud 459
+nu 455
+na 438
+ei 432
+ks 418
+mi 411
+ee 411
+u_ 407
+ka 400
+n_ 394
+b 394
+ga 386
+_l 384
+_a 380
+an 366
+ja_ 365
+et 358
+me 358
+l_ 350
+at 348
+la 341
+ad 340
+st_ 339
+ne 336
+ll 333
+_ta 332
+ra 330
+_ja 328
+ik 323
+en 318
+ni 308
+ul 305
+sa 302
+_ol 302
+nd 299
+_ja_ 299
+nud 296
+ii 291
+ko 286
+_se 285
+le_ 283
+aa 281
+is_ 281
+gi 270
+_te 269
+ag 269
+_va 268
+_ku 267
+ed 262
+em 255
+_mi 255
+ma_ 247
+ti 246
+ri 245
+_h 242
+gu 239
+id_ 238
+ast 237
+it 236
+ga_ 236
+un 232
+de 230
+ud_ 230
+ha 230
+ak 228
+ah 228
+uu 228
+il 227
+ôi 226
+as_ 223
+ke 222
+ar 220
+a, 220
+am 220
+_ko 220
+a,_ 220
+_ka 220
+ai 220
+eg 216
+sin 214
+est 214
+ui 214
+he 214
+ks_ 213
+ö 213
+oo 213
+ju 207
+är 205
+ut 203
+in_ 203
+oli 201
+ki 199
+su 199
+es_ 199
+lt 198
+ist 188
+li_ 186
+ea 186
+vi 184
+im 181
+mu 181
+se_ 180
+ts 180
+on 178
+ise 178
+ta_ 177
+ek 176
+_oli 176
+sel 173
+nud_ 173
+_ü 172
+a. 171
+nn 170
+ema 169
+ng 168
+lu 168
+ge 167
+_si 166
+_ei 165
+_i 165
+_ei_ 164
+ei_ 164
+_r 163
+ole 161
+pa 160
+lle 160
+a._ 160
+ust 159
+du 156
+er 156
+vô 153
+da_ 153
+min 152
+et_ 151
+d,_ 149
+_M 149
+ht 149
+d, 149
+M 149
+kui 148
+_et 147
+K 147
+_K 146
+pe 145
+gi_ 145
+_vô 145
+or 144
+_tu 142
+lt_ 141
+_ma 141
+asi 140
+ve 139
+us_ 138
+ig 136
+sin_ 136
+ur 135
+_ta_ 134
+di 134
+_et_ 134
+s,_ 132
+tas 132
+s, 132
+_kui 131
+sk 131
+re 130
+po 129
+oli_ 129
+om 129
+äi 128
+inu 128
+_na 128
+_oli_ 128
+_sa 128
+aj 128
+mis 127
+ui_ 127
+_me 127
+_pa 126
+tus 125
+pi 125
+te_ 124
+ül 123
+- 123
+est_ 122
+_on 121
+kk 121
+tt 120
+aga 119
+na_ 119
+_T 119
+T 119
+b_ 118
+al_ 118
+sta 118
+_mu 116
+_ju 116
+ida 116
+aks 116
+gu_ 116
+_ni 116
+s. 116
+ad_ 116
+_pe 114
+eks 114
+ev 114
+end 113
+s._ 113
+use 111
+ära 111
+_po 111
+_min 110
+S 110
+aja 110
+_la 110
+ele 109
+el_ 108
+on_ 108
+ab 108
+_S 108
+av 107
+ing 107
+kui_ 106
+_on_ 106
+au 104
+ne_ 104
+ti_ 104
+ell 103
+ae 101
+kô 101
+ed_ 100
+_ke 99
+ata 99
+iis 99
+! 98
+!_ 98
+sid 98
+nda 98
+eh 98
+lle_ 97
+pu 97
+ää 97
+vôi 97
+ine 96
+t, 96
+e,_ 96
+ale 96
+_vôi 96
+t,_ 96
+e, 96
+eda 96
+uk 95
+ast_ 95
+ld 95
+? 94
+_kui_ 94
+_sel 93
+_kô 93
+tul 93
+ega 93
+lg 92
+sii 92
+val 92
+e. 92
+_su 92
+ug 92
+oh 92
+kü 92
+d. 91
+ee_ 91
+see 91
+e._ 91
+öö 91
+oma 91
+_ole 90
+ses 90
+stu 90
+ôt 90
+üü 90
+_om 89
+me_ 89
+ot 89
+d._ 89
+_sii 88
+to 88
+_en 87
+atu 87
+?_ 87
+A 86
+J 86
+pea 86
+jä 85
+_A 85
+_see 85
+ime 84
+_pi 84
+_ha 84
+mô 84
+nä 84
+_J 84
+les 84
+ste 84
+kas 84
+_ä 84
+vä 83
+E 83
+pä 83
+_ve 83
+_E 83
+eis 82
+_jä 81
+_pea 81
+_mô 80
+um 80
+_kü 80
+iku 80
+üd 80
+all 79
+eid 79
+ba 79
+_vä 79
+ina 78
+lj 78
+sid_ 78
+hu 78
+tun 78
+lä 78
+_oma 77
+i,_ 77
+i, 77
+agu 77
+uh 77
+lm 76
+ras 76
+ss 76
+kä 76
+ees 76
diff --git a/libtextcat/data/new_fingerprints/lm/finnish.lm b/libtextcat/data/new_fingerprints/lm/finnish.lm
new file mode 100644
index 000000000000..328f88604279
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/finnish.lm
@@ -0,0 +1,400 @@
+_ 19984
+a 9133
+i 8384
+t 7797
+e 6481
+n 6431
+s 5897
+l 4504
+o 4163
+u 4106
+k 4013
+ä 3354
+n_ 2868
+m 2569
+a_ 1987
+v 1905
+r 1827
+ta 1580
+en 1553
+is 1515
+h 1508
+y 1462
+st 1390
+in 1375
+p 1342
+j 1333
+an 1139
+si 1073
+tt 1030
+te 1008
+en_ 982
+_k 980
+it 974
+ll 947
+aa 942
+ä_ 902
+va 878
+el 855
+_t 851
+ka 846
+i_ 835
+. 832
+se 818
+li 806
+tä 804
+oi 767
+ai 744
+._ 739
+tu 734
+_o 719
+mi 715
+al 703
+on 684
+d 681
+_v 662
+et 654
+_j 641
+t_ 635
+ti 632
+_m 628
+_s 620
+ja 616
+ma 596
+sa 595
+la 582
+ist 575
+_e 565
+to 565
+ks 557
+in_ 554
+es 551
+il 538
+an_ 536
+ki 527
+, 525
+ku 525
+,_ 524
+us 520
+as 514
+nt 512
+ri 495
+ke 494
+at 491
+_p 485
+le 484
+ik 483
+ss 477
+ut 469
+ö 469
+sta 460
+ee 459
+uu 458
+ol 457
+ta_ 451
+ne 445
+ää 445
+ei 443
+uo 436
+ko 433
+un 430
+lu 421
+ii 420
+e_ 418
+nn 413
+_h 412
+ar 408
+er 402
+än 396
+ja_ 386
+im 381
+on_ 365
+_va 363
+aan 354
+_a 352
+me 350
+ak 345
+ssa 331
+na 330
+ie 329
+pa 327
+_ja 326
+ia 325
+tä_ 322
+_l 319
+vi 317
+ise 316
+tta 315
+de 314
+os 312
+lli 309
+_ja_ 304
+jo 295
+vä 290
+su 289
+au 287
+lis 286
+_on 285
+sä 284
+uk 280
+am 280
+ot 280
+ty 275
+ett 271
+ttä 270
+ni 269
+lä 267
+ksi 264
+nk 264
+ht 263
+ul 261
+ell 261
+sa_ 259
+ha 257
+sen 257
+a. 254
+isi 253
+ste 253
+aan_ 252
+_on_ 252
+_ka 252
+sk 251
+kk 246
+itt 245
+ok 242
+a._ 239
+all 239
+yt 239
+mä 237
+mu 237
+av 237
+_y 236
+lla 233
+taa 231
+ais 231
+een 230
+K 230
+lt 228
+s_ 227
+ast 227
+iv 226
+ssa_ 225
+ra 225
+- 223
+kse 223
+oit 220
+om 220
+T 219
+_ku 218
+än_ 216
+aa_ 214
+at_ 214
+tel 211
+ui 210
+si_ 208
+rk 207
+sta_ 207
+_jo 203
+kä 202
+_K 201
+est 200
+em 200
+he 199
+_n 199
+vo 198
+_ta 196
+eh 196
+_ol 196
+S 196
+nta 196
+_ko 194
+je 194
+stä 194
+är 193
+ust 191
+mis 191
+ns 190
+pu 189
+nen 188
+ät 188
+toi 188
+iin 187
+ten 187
+min 186
+ista 185
+hd 184
+a, 184
+a,_ 184
+sen_ 183
+E 182
+lle 181
+vat 179
+ill 177
+no 176
+pä 176
+lm 176
+llis 175
+n. 175
+io 172
+ine 171
+n._ 170
+pi 169
+uks 168
+ava 168
+ään 166
+nen_ 165
+ah 165
+_mu 164
+tus 163
+mm 162
+_to 162
+ek 160
+int 159
+_r 159
+lin 158
+oim 158
+_T 158
+A 158
+imi 157
+tö 157
+la_ 157
+jä 157
+aj 156
+yh 155
+o_ 154
+lo 154
+oli 153
+een_ 153
+le_ 153
+_si 153
+g 152
+aik 151
+vat_ 150
+L 149
+ur 149
+ti_ 149
+sia 148
+ite 147
+inen 147
+ain 146
+sti 146
+lla_ 146
+ys 145
+_mi 145
+val 144
+stu 144
+äm 144
+alli 143
+pe 143
+utt 142
+et_ 141
+_tu 141
+eri 140
+_E 140
+: 140
+nki 139
+ir 139
+llä 138
+up 138
+äi 137
+ama 137
+_ha 135
+id 135
+_se 135
+po 134
+inen_ 134
+tte 133
+nna 133
+ten_ 132
+or 132
+ts 131
+nä 131
+yk 131
+äs 131
+_S 130
+ses 130
+ve 130
+ess 129
+äl 129
+ita 129
+lai 129
+H 129
+van 127
+äk 127
+kin 127
+N 127
+_te 126
+den 126
+tee 126
+P 126
+kaa 126
+iin_ 125
+kun 125
+ois 125
+sit 125
+oh 124
+V 124
+yö 124
+äv 124
+tav 124
+voi 124
+ia_ 123
+I 123
+oll 123
+maa 122
+ih 122
+oj 122
+rj 121
+ro 121
+ikk 120
+so 120
+oo 120
+oimi 120
+do 120
+pp 119
+M 119
+_ei 118
+toim 118
+op 118
+uut 118
+tet 118
+_i 118
+_ma 117
+vai 117
+lä_ 116
+u_ 116
+sy 116
+kau 116
+utta 116
+un_ 115
+eu 115
+ssä 115
+tti 115
+_sa 115
+mp 114
+eis 114
+ka_ 112
+että 112
+taa_ 111
+_et 111
+hu 111
+itu 111
+suu 111
+den_ 111
+ksen 110
+ap 110
+_ke 110
+uv 110
+tam 110
+yv 109
+aup 109
+stä_ 109
+asta 109
+äy 109
+kan 108
+nu 108
+ukse 108
+_toi 107
+ien 107
+hi 107
+iss 107
diff --git a/libtextcat/data/new_fingerprints/lm/french.lm b/libtextcat/data/new_fingerprints/lm/french.lm
new file mode 100644
index 000000000000..5080d9cba9d5
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/french.lm
@@ -0,0 +1,400 @@
+_ 20800
+e 7258
+i 4051
+s 4003
+a 3972
+n 3903
+r 3650
+t 3590
+u 2968
+o 2823
+l 2723
+e_ 2632
+d 2241
+s_ 1721
+_d 1693
+c 1663
+p 1528
+é 1320
+m 1297
+es 1164
+t_ 1106
+_l 1079
+de 1048
+on 959
+_de 940
+en 939
+_p 852
+nt 825
+le 808
+es_ 791
+re 777
+, 721
+,_ 720
+n_ 703
+de_ 685
+' 670
+an 667
+_de_ 645
+v 641
+_s 610
+r_ 596
+_c 594
+er 585
+ai 575
+_a 558
+_e 554
+ou 554
+q 549
+qu 538
+is 530
+te 528
+ti 525
+ur 519
+it 514
+g 498
+a_ 490
+f 480
+la 476
+in 475
+_le 441
+me 436
+nt_ 432
+. 427
+b 427
+ra 423
+io 416
+ent 415
+._ 404
+ne 395
+ns 392
+ion 383
+h 381
+ue 376
+se 371
+le_ 370
+ar 370
+ie 362
+co 361
+at 359
+tr 359
+et 349
+pr 342
+ce 336
+au 328
+u_ 321
+il 314
+_r 313
+_la 304
+un 303
+eu 303
+st 300
+re_ 296
+ro 290
+la_ 288
+on_ 287
+_m 286
+_la_ 283
+que 281
+_qu 280
+_q 280
+po 275
+tio 273
+tion 273
+pa 273
+li 271
+_t 269
+nc 268
+si 266
+_pr 265
+ri 264
+al 263
+ui 262
+_co 259
+i_ 255
+ta 255
+é_ 251
+x 247
+em 244
+l_ 243
+et_ 238
+_l' 236
+l' 236
+les 233
+ns_ 233
+ir 232
+_le_ 228
+ent_ 227
+or 226
+ré 224
+_f 224
+ne_ 222
+à 221
+ve 220
+ch 220
+it_ 219
+di 219
+oi 217
+- 216
+ni 215
+à_ 215
+les_ 215
+d' 214
+el 212
+ss 212
+_n 212
+ut 211
+our 210
+des 210
+" 208
+ur_ 207
+nd 207
+er_ 206
+ait 206
+ion_ 204
+rs 202
+_en 201
+_et 200
+j 200
+_d' 200
+ll 199
+_des 198
+des_ 197
+_pa 197
+té 196
+_et_ 195
+_à 195
+_à_ 195
+om 193
+ma 192
+ati 190
+_des_ 189
+L 188
+so 187
+_u 185
+è 184
+_" 183
+sa 182
+_po 181
+tre 181
+dé 181
+ue_ 180
+pe 179
+en_ 179
+ont 178
+_un 178
+_L 178
+us 176
+_les 176
+_les_ 176
+rt 176
+is_ 173
+_i 173
+du 172
+e,_ 171
+e, 171
+na 171
+s, 170
+s,_ 170
+as 169
+men 169
+M 167
+ait_ 167
+'a 166
+vi 162
+ci 159
+ant 158
+_au 158
+da 157
+_M 157
+ation 155
+atio 155
+con 154
+que_ 153
+ons 153
+eur 151
+est 149
+me_ 149
+mi 149
+par 148
+tion_ 148
+_so 147
+te_ 147
+res 144
+lo 144
+ment 144
+és 144
+ans 143
+_du 142
+du_ 141
+ux 141
+un_ 140
+y 138
+pro 138
+_du_ 136
+_dé 136
+ce_ 135
+_se 134
+_re 134
+pl 133
+A 132
+ge 131
+ic 131
+su 130
+x_ 129
+ien 129
+nce 129
+"_ 129
+ac 128
+il_ 128
+qui 128
+_pro 127
+no 127
+av 126
+_v 125
+_o 125
+rs_ 125
+ans_ 124
+eme 124
+bl 123
+emen 122
+_en_ 122
+iqu 122
+ct 122
+iq 122
+lle 122
+nn 121
+ts 121
+ement 121
+ét 120
+_"_ 120
+ér 119
+té_ 119
+_ce 119
+mp 119
+ire 119
+ui_ 119
+to 118
+he 117
+_é 117
+ca 117
+_j 116
+ec 116
+va 116
+_par 116
+ée 115
+_con 115
+se_ 114
+tre_ 113
+ique 112
+dan 111
+éc 111
+ha 110
+une 110
+P 110
+lu 110
+ux_ 109
+_b 108
+s. 108
+pou 108
+_pou 108
+ier 107
+C 107
+ais 106
+s._ 105
+ain 104
+_un_ 104
+nte 103
+'e 103
+mo 103
+mm 103
+ment_ 102
+une_ 102
+com 101
+_P 101
+'i 101
+_ma 100
+do 99
+ant_ 98
+anc 98
+che 97
+ap 97
+ont_ 97
+_que 97
+os 97
+urs 96
+_di 96
+fi 96
+im 96
+pour 96
+_pour 96
+ê 95
+ts_ 95
+_g 95
+our_ 94
+_sa 94
+ntr 94
+_da 94
+_ré 93
+rai 93
+rm 93
+_qui 93
+e. 92
+am 92
+_com 91
+uv 91
+_C 91
+D 91
+qui_ 90
+e._ 90
+pu 89
+_qui_ 88
+ia 87
+_dan 87
+_dans 87
+dans 87
+ter 87
+fo 87
+son 87
+dans_ 87
+id 86
+ag 86
+ine 86
+tu 85
+ran 85
+au_ 85
+ol 85
+oc 84
+est_ 84
+st_ 84
+enc 84
+F 82
+_tr 81
+'u 81
+tai 81
+ell 80
+R 79
+_su 79
+S 79
+ions 79
+pré 79
+sé 78
+ab 78
+né 77
+_que_ 77
+_in 77
+_av 76
+pour_ 76
+fa 76
+rr 76
+air 75
+_ch 75
+_a_ 75
+ba 74
+_pl 74
+gr 74
+tt 74
+ssi 74
+rd 73
+pas 73
+bi 73
diff --git a/libtextcat/data/new_fingerprints/lm/frisian.lm b/libtextcat/data/new_fingerprints/lm/frisian.lm
new file mode 100644
index 000000000000..9efa35f370c8
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/frisian.lm
@@ -0,0 +1,400 @@
+_ 46446
+e 15767
+n 9616
+i 7837
+a 7830
+t 7562
+r 7297
+s 6307
+o 4784
+n_ 4595
+d 4564
+e_ 4213
+l 4032
+k 3951
+t_ 3079
+en 2863
+m 2605
+er 2492
+y 2462
+f 2371
+_d 2253
+de 2124
+h 1943
+in 1899
+w 1898
+en_ 1857
+_i 1815
+u 1769
+g 1738
+an 1690
+j 1678
+p 1651
+r_ 1619
+_f 1602
+. 1601
+ar 1561
+te 1545
+b 1488
+s_ 1476
+._ 1435
+_s 1370
+de_ 1235
+_de 1224
+_w 1181
+it 1172
+ie 1140
+, 1078
+_o 1056
+,_ 1056
+oa 1045
+_e 1040
+st 1038
+an_ 1010
+_b 972
+_de_ 965
+ke 949
+_m 947
+_t 933
+ne 920
+er_ 895
+at 863
+sk 856
+c 850
+it_ 848
+_h 838
+ei 832
+k_ 817
+yn 790
+ch 784
+le 772
+is 769
+je 765
+el 761
+me 758
+ea 754
+_k 740
+fa 736
+in_ 735
+' 733
+_it 733
+_it_ 727
+ge 727
+re 725
+al 724
+_fa 684
+yn_ 640
+aa 628
+fan 626
+_y 618
+ar_ 617
+et 616
+ri 615
+_fan 613
+_n 612
+li 611
+_yn 610
+_en 598
+fan_ 594
+oe 589
+_fan_ 584
+_en_ 583
+at_ 581
+_in 570
+oar 565
+_in_ 560
+y_ 555
+F 549
+es 540
+_a 536
+ng 523
+be 514
+sj 512
+nt 510
+l_ 510
+_F 506
+ns 506
+D 499
+te_ 499
+der 497
+_yn_ 497
+ti 493
+ek 490
+ro 476
+rs 474
+rd 473
+se 470
+fo 470
+ys 462
+op 461
+û 461
+we 456
+ry 451
+da 445
+d_ 443
+is_ 442
+_D 440
+ur 433
+i_ 432
+ha 426
+_me 425
+ear 422
+_l 417
+ed 410
+as 409
+om 407
+ei_ 406
+Fr 405
+nd 404
+_fo 394
+_p 393
+oc 390
+rys 389
+ol 386
+_Fr 386
+och 383
+fe 381
+ik 379
+ma 379
+ra 377
+nn 374
+_g 368
+_da 367
+di 363
+ts 362
+ta 361
+a_ 360
+ko 359
+et_ 358
+ysk 356
+Fry 354
+Frys 354
+z 354
+ll 350
+_be 349
+ke_ 348
+I 347
+ing 346
+_' 345
+m_ 343
+h_ 340
+ske 339
+_ha 338
+sje 336
+_Fry 336
+_Frys 336
+wi 335
+_op 334
+p_ 334
+_is 333
+ch_ 333
+tr 330
+ten 328
+ers 327
+wa 325
+ter 322
+ji 322
+rysk 319
+_ne 319
+je_ 312
+foa 311
+ê 309
+jo 307
+_is_ 307
+ste 307
+_te 306
+â 301
+n. 301
+nne 300
+rt 300
+foar 299
+S 299
+mei 299
+_r 298
+_oa 297
+wu 293
+ak 291
+si 290
+wur 290
+ni 290
+pe 288
+Frysk 287
+oan 286
+n._ 285
+_mei 284
+der_ 281
+_foar 281
+_foa 281
+ht 278
+cht 277
+- 275
+ú 275
+_j 274
+ne_ 273
+ken 273
+fer 271
+ûn 270
+am 270
+on 268
+nde 264
+_S 264
+B 262
+ki 261
+id 261
+le_ 261
+dat 260
+v 259
+_I 256
+oar_ 255
+op_ 254
+lle 252
+la 244
+tt 243
+dat_ 242
+_fe 242
+mm 240
+inn 239
+_wi 239
+_dat 237
+g_ 236
+mei_ 236
+al_ 235
+_B 234
+e. 234
+ld 231
+_al 230
+_wur 230
+urd 230
+_wu 230
+_op_ 230
+De 229
+inne 228
+ng_ 227
+_mei_ 227
+'t 226
+ks 226
+'t_ 224
+_dat_ 224
+_ko 223
+_st 220
+ân 219
+rk 219
+sa 219
+e._ 219
+tte 218
+en. 218
+n, 217
+ier 216
+ten_ 216
+_fer 215
+mi 215
+hi 215
+ien 214
+_wa 213
+n,_ 213
+no 213
+_te_ 212
+ig 212
+_De 212
+ske_ 212
+_der 212
+W 211
+H 211
+_oan 210
+ee 209
+dy 208
+ek_ 207
+en._ 207
+ic 207
+mme 206
+yk 204
+pr 204
+net 203
+foar_ 203
+he 203
+wurd 201
+_der_ 199
+jen 199
+_dy 199
+kr 198
+ka 197
+im 196
+_H 196
+il 196
+ze 196
+_ma 195
+by 194
+oer 194
+kt 193
+us 193
+M 193
+sk_ 192
+wo 192
+_hi 191
+or 190
+ing_ 190
+_W 190
+ich 189
+De_ 187
+rr 186
+int 185
+_ú 184
+: 184
+ij 184
+_ek 183
+eg 182
+:_ 180
+gen 180
+as_ 180
+_se 179
+e, 178
+_net 177
+e,_ 177
+ins 177
+N 177
+ls 176
+st_ 176
+_wurd 176
+ie_ 175
+E 175
+nne_ 175
+_De_ 175
+sy 175
+wer 174
+gr 174
+f_ 173
+nk 172
+och_ 172
+net_ 169
+ad 169
+_we 169
+rde 168
+sl 168
+bi 168
+of 168
+so 168
+_no 167
+_ta 167
+re_ 167
+to 167
+den 167
+J 167
+t. 166
+It 165
+út 165
+inne_ 165
+ysk_ 165
+lik 164
+sp 163
+_ek_ 162
+ou 162
+tsj 162
+It_ 161
+_sa 160
+A 160
+wol 160
+lâ 159
+_wo 159
+ge_ 159
+lân 159
+ige 158
diff --git a/libtextcat/data/new_fingerprints/lm/georgian.lm b/libtextcat/data/new_fingerprints/lm/georgian.lm
new file mode 100644
index 000000000000..0e88ab08a895
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/georgian.lm
@@ -0,0 +1,400 @@
+_ 14926
+À 7221
+È 5780
+Ä 4137
+Ã 2966
+Ñ 2908
+Ë 2607
+Ê 2372
+Ã 2260
+Ã… 2080
+Ã 2078
+Ì 2005
+Ç 1598
+Ã 1383
+Ó 1363
+È_ 1248
+À_ 1149
+Â 1145
+ÊÈ 1009
+Þ 974
+_Ë 965
+Ñ_ 964
+ÃÀ 912
+ÈÑ 901
+Àà 897
+Ø 820
+. 813
+Äà 767
+" 762
+._ 735
+, 720
+,_ 718
+ÑÀ 684
+_À 669
+Ú 653
+ËÀ 631
+ÀÌ 622
+ÄÊ 575
+ÃÈ 570
+ÅÄ 567
+Õ 551
+_Ã 550
+Ã’ 542
+_Ñ 526
+É 518
+ÀË 517
+ÅÀ 485
+ÅÈ 479
+ÂÀ 478
+_Â 474
+ÓÊ 468
+_È 436
+ÃÀ 435
+ÀÅ 420
+ËÈ 419
+ÌÈ 418
+Äà 416
+ÄÌ 412
+ÈÑ_ 407
+_ÃÀ 404
+ÃÀ_ 393
+ÀÊ 384
+Ëà 382
+Ü 376
+_" 374
+ÊÈ_ 371
+× 369
+ÃÃ 364
+ÀÑ 360
+ÈÇ 358
+ÀÃ 353
+ÌÀ 349
+Ô 349
+ÃÀ 342
+Æ 341
+ËÄ 335
+ÈÀ 334
+ÈÊ 332
+ÃÇ 326
+_Ã 322
+ÃÈ 321
+ØÈ 319
+_Ä 319
+_Ø 319
+ÃÄ 317
+_ÂÀ 316
+ÇÀ 316
+ÄÑ 306
+Ä_ 299
+_Ç 288
+ÃÄ 279
+ÓÊÈ 273
+Êà 271
+Ö 270
+Ì_ 267
+ÌÄ 267
+_ÑÀ 266
+ÃÃ 263
+Óà 260
+Ç_ 256
+ÄÊÈ 255
+ÃÈ 238
+ÊÄ 238
+ÑÈ 234
+ÊÀ 233
+ÃÃ 230
+ÈÌ 229
+_Þ 227
+"_ 225
+ÄÃÈ 224
+: 224
+:_ 223
+È. 221
+_ÃÀ_ 217
+Û 215
+ÞÄ 213
+È, 213
+È,_ 212
+_Ó 211
+Ã_ 209
+_ËÀ 208
+ÈÑÀ 208
+ÃË 206
+_Àà 204
+ÇÅ 203
+ÀÃÇ 203
+ØÄ 203
+È._ 201
+À. 200
+- 193
+ÀÞ 192
+ÅÄÊ 192
+Ú_ 189
+ÈÃ 188
+Ù 188
+ÕÀ 187
+Ã_ 185
+ÈÊÈ 183
+ÄÃ 179
+À._ 177
+Ã_ 177
+ÃÃ 174
+ÞÀ 174
+Èà 173
+ÌÃ 172
+_ÃÃ 172
+ÃÑ 172
+ÄÃÀ 171
+_Ëà 170
+ÃÈ_ 170
+_ËÈ 170
+_Ü 169
+ÇÈ 166
+ÃÄ 166
+_Õ 165
+ØÈ_ 165
+ÄÑ_ 163
+ÀÈ 162
+_ØÄ 160
+ÄÅ 158
+_É 155
+ÀÚ 154
+ÊÈÑ 153
+ÃŽ 151
+Ë_ 149
+ÕÀà 148
+À,_ 147
+À, 147
+ÀÇ 147
+ÀÂ 145
+ÕÀÃÇ 145
+ÈÂ 143
+ËÀà 143
+ÃÊ 143
+ÀÊÈ 142
+ÂÈ 142
+ÌÄà 141
+ÄË 140
+ÀØ 139
+ÓÊÈ_ 139
+ÄÇ 139
+ÇÓ 138
+_Ì 136
+ÈÇ_ 135
+ÀÃÈ 133
+ÀÌ_ 132
+ÊÄà 131
+×à 130
+ÑÄ 130
+ÈË 130
+ÃÄà 128
+ÀÕ 125
+ÆÄ 125
+ÔÄ 125
+Ê_ 124
+ÀÅÈ 124
+ÀÃ_ 124
+ÅÈÑ 123
+_ËÄ 123
+ÀËÈ 122
+_ÕÀ 121
+ÉÈ 121
+_Ã… 120
+×Å 120
+ÃÃ… 116
+_Ã 114
+ÀÌÈ 113
+ÀËà 113
+ÂÀÌ 113
+ÃÃŒ 112
+ÉÀ 112
+ÈÚ 112
+ØÀ 112
+ÀÃÀ 111
+ÃÓ 111
+ÞÅ 109
+ÀÖ 109
+Âà 107
+ÃÀ_ 107
+ÌÈ_ 107
+ÅÀà 107
+ÄÊÈ_ 106
+ÃÃ 106
+ÀÃ_ 106
+_ÀË 105
+ß 104
+ÄÃÇ 104
+_Äà 104
+ÅÄÌ 103
+_ÄÑ 103
+ÃÃË 103
+ÄÌÈ 103
+_Ú 103
+ÃÃ 102
+_Ù 102
+ËÈÑ 102
+ÃÈÑ 102
+Þà 102
+_ÃÃË 101
+_ÕÀà 101
+ÈÅ 100
+_ÕÀÃÇ 100
+ÓÃÈ 99
+ÒÈ 99
+ÂÄ 99
+ÈÒ 99
+ÀÀ 97
+ÀÒ 97
+ÃÓÊ 96
+ÕÅ 94
+ÈÄ 94
+_ÇÀ 94
+Ñ,_ 93
+ÃÑ 93
+ÅÊ 93
+_ØÀ 93
+Ñ, 93
+_ÑÈ 93
+ÀÉ 93
+ÀÆ 92
+ÃÃÀ 92
+ÀÑ_ 92
+Ìà 91
+ÄÃÓ 91
+ÇÕ 91
+ËÓ 90
+ÄÌ_ 90
+ÇÀÅ 89
+ÄÃÓÊ 89
+ÊÑ 89
+ÀØÈ 89
+ÃÄà 89
+Àà 89
+È" 89
+Ñ. 88
+ÚÞ 88
+ÂÀË 88
+ÃÑ_ 87
+_ÄÃÇ 87
+È× 87
+ÃÈÑ 87
+ÌÃÀ 87
+ØÅ 87
+ÞÄÊ 87
+ÃÈÇ 85
+ÑÀ_ 85
+ÇÅÄ 85
+ÓÌ 85
+ÒÀ 85
+_ÃŽ 84
+ÊÈ. 84
+_ÃÀ 83
+Ñ._ 83
+_Ô 83
+_ÂÀË 83
+ÊÈÀ 83
+ÊÈ._ 82
+ÄÃÈ 82
+ÈÀ_ 82
+ÈÀÌ 82
+ÜÈ 81
+ÀÚ_ 81
+"Ë 81
+ÈÓ 80
+_"Ë 80
+ÃÇÅ 80
+_ÄÑ_ 79
+_ÈÂ 79
+ÀÓ 79
+ÈÕ 79
+ÀÃÇÅ 79
+ÇÈ_ 79
+ÑÀÞ 79
+ÃÇÓ 78
+ÊÈ, 78
+ÚÈ 78
+ÞÈ 78
+ÃÇÓÊ 78
+ÇÓÊ 78
+ÊÈ,_ 78
+ÀÃÀ 78
+ÃÑ 78
+_ÀÃ_ 77
+ÞÊ 77
+ÃÓ 77
+ÀÃÇÓÊ 77
+_ÌÀ 77
+ÅÈÇ 77
+ÈÂÈ 77
+ÀÃÇÓ 77
+ÜÀ 76
+ÅÀ_ 75
+_ÞÀ 75
+ÉÅ 75
+ÒÄ 75
+ÃÀ_ 75
+ÅÀÊ 75
+ÇÅÄÊ 74
+ÃÇÅÄ 74
+ÀÃÇÅÄ 74
+ÄÃÀ_ 74
+ÃË 73
+ÑÞ 73
+ÑÒ 73
+ÅÑ 73
+ÑÓ 73
+ÃÇÅÄÊ 73
+ËÞ 73
+ÃÊà 72
+ÃÚ 72
+_× 72
+Âà 71
+ÅÄ_ 71
+Ã’Ã 71
+ÇÄ 71
+ÃÂ 71
+ÅÄà 71
+ÕÀÃÇÓ 71
+ÃÀÃ 70
+ÀË_ 70
+ÅÃ 69
+ËÀ_ 69
+ÃÄ 69
+_ÀÃÀ 67
+à 67
+ÄÃÀ 67
+ÇÀÌ 67
+È×à 67
+ÕÀÃÇÅ 67
+Ò× 67
+ÂÀËà 66
+_ÂÀËà 66
+ÄÒ 66
+ÃÃÀ 66
+ÅÄÊÈ 66
+ÀÌÀ 66
+ÄÃÈÇ 66
+_ÈÂÈ 66
+ÄÊà 65
+ÄÂ 65
+ËÈÊ 65
+ÊË 65
+ÈÊÈ_ 65
+ÃÃÈ 65
+Ã…Ã 65
+_ÈÑ 64
+Ó_ 63
+ËÃ 63
+", 63
+Ãà 63
+ÊÃà 63
+ÂÅ 63
+ÃÂ 63
+Äà 62
+À" 62
+ÀÅÀ 62
+? 62
+",_ 62
+! 62
+_È× 62
diff --git a/libtextcat/data/new_fingerprints/lm/german.lm b/libtextcat/data/new_fingerprints/lm/german.lm
new file mode 100644
index 000000000000..eb4eda0f8239
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/german.lm
@@ -0,0 +1,400 @@
+_ 31586
+e 15008
+n 9058
+i 7299
+r 6830
+t 5662
+s 5348
+a 4618
+h 4176
+d 4011
+er 3415
+en 3412
+u 3341
+l 3266
+n_ 2848
+c 2636
+ch 2460
+g 2407
+o 2376
+e_ 2208
+r_ 2128
+m 2077
+_d 1948
+de 1831
+en_ 1786
+ei 1718
+er_ 1570
+in 1568
+te 1505
+ie 1505
+b 1458
+t_ 1425
+f 1306
+k 1176
+ge 1144
+s_ 1137
+un 1113
+, 1104
+,_ 1099
+w 1099
+z 1060
+nd 1039
+he 1004
+st 989
+_s 952
+_de 949
+. 909
+_e 906
+ne 906
+der 880
+._ 847
+be 841
+es 829
+ic 796
+_a 791
+ie_ 779
+is 769
+ich 763
+an 755
+re 749
+di 732
+ein 730
+se 730
+" 720
+ng 709
+_i 706
+sc 683
+sch 681
+it 673
+der_ 652
+h_ 651
+ch_ 642
+S 630
+le 609
+p 609
+ä 607
+ü 603
+au 603
+v 602
+che 599
+_w 596
+d_ 585
+die 576
+_di 572
+m_ 562
+_die 559
+el 548
+_S 540
+_der 529
+li 527
+_der_ 523
+si 515
+al 514
+ns 507
+on 501
+or 495
+ti 490
+ten 487
+ht 486
+die_ 485
+_die_ 483
+D 479
+rt 478
+nd_ 476
+_u 470
+nt 468
+A 466
+in_ 464
+den 461
+cht 447
+und 443
+me 440
+_z 429
+ung 426
+ll 423
+_un 421
+_ei 419
+_n 415
+hr 412
+ine 412
+_A 408
+_ein 405
+ar 404
+ra 403
+_v 400
+_g 400
+as 395
+zu 392
+et 389
+em 385
+_D 380
+eine 376
+gen 376
+g_ 376
+da 368
+we 366
+K 365
+lt 360
+B 354
+_" 353
+nde 349
+ni 347
+und_ 345
+E 345
+ur 345
+_m 342
+ri 341
+ha 340
+eh 339
+ten_ 338
+es_ 336
+_K 336
+_und 335
+ig 335
+_b 335
+hen 334
+_und_ 332
+_au 329
+_B 327
+_da 325
+_zu 324
+_in 322
+at 321
+us 318
+wi 307
+n, 305
+n,_ 304
+nn 304
+te_ 301
+eit 301
+_h 300
+ter 299
+M 298
+n. 295
+ß 294
+ng_ 289
+sche 289
+- 283
+rs 282
+den_ 282
+_si 280
+G 280
+im 278
+_ge 277
+chen 276
+rd 273
+_E 273
+n._ 270
+icht 270
+rn 268
+uf 267
+isch 264
+isc 264
+nen 263
+_in_ 262
+_M 260
+_er 257
+ich_ 255
+ac 253
+lic 252
+_G 252
+ber 252
+la 251
+vo 251
+eb 250
+ke 249
+F 248
+as_ 248
+hen_ 248
+ach 245
+en, 244
+ung_ 243
+lich 243
+ste 243
+en,_ 243
+_k 241
+ben 241
+_f 241
+en. 241
+_be 239
+it_ 239
+L 238
+_se 237
+mi 236
+ve 236
+na 236
+on_ 236
+P 235
+ss 234
+ist 234
+ö 234
+ht_ 233
+ru 233
+st_ 229
+_F 229
+ts 227
+ab 226
+W 226
+ol 225
+_eine 225
+hi 225
+so 224
+em_ 223
+"_ 223
+ren 222
+en._ 221
+chen_ 221
+R 221
+ta 221
+ere 220
+ische 219
+ers 218
+ert 217
+_P 217
+tr 217
+ed 215
+ze 215
+eg 215
+ens 215
+ür 213
+ah 212
+_vo 212
+ne_ 211
+cht_ 210
+uc 209
+_wi 209
+nge 208
+lle 208
+fe 207
+_L 207
+ver 206
+hl 205
+V 204
+ma 203
+wa 203
+auf 201
+H 198
+_W 195
+T 195
+nte 193
+uch 193
+l_ 192
+sei 192
+nen_ 190
+u_ 189
+_den 189
+_al 189
+_V 188
+t. 188
+lte 187
+ut 186
+ent 184
+sich 183
+sic 183
+il 183
+ier 182
+am 181
+gen_ 180
+sen 179
+fü 178
+um 178
+t._ 177
+f_ 174
+he_ 174
+ner 174
+nst 174
+ls 174
+_sei 173
+ro 173
+ir 173
+ebe 173
+mm 173
+ag 172
+ern 169
+t,_ 169
+t, 169
+eu 169
+ft 168
+icht_ 167
+hre 167
+Be 166
+nz 165
+nder 165
+_T 164
+_den_ 164
+iche 163
+tt 163
+zu_ 162
+and 162
+J 161
+rde 160
+rei 160
+_we 159
+_H 159
+ige 159
+_Be 158
+rte 157
+hei 156
+das 155
+aus 155
+che_ 154
+_das 154
+_zu_ 154
+tz 154
+_ni 153
+das_ 153
+_R 153
+N 153
+des 153
+_ve 153
+_J 152
+I 152
+_das_ 152
+men 151
+_so 151
+_ver 151
+_auf 150
+ine_ 150
+_ha 150
+rg 149
+ind 148
+eben 148
+kt 147
+mit 147
+_an 147
+her 146
+Ge 146
+Sc 145
+_sich 145
+U 145
+Sch 145
+_sic 145
+end 145
+Di 144
+abe 143
+ck 143
+sse 142
+ür_ 142
+ell 142
+ik 141
+o_ 141
+nic 141
+nich 141
+sa 141
+_fü 140
+hn 140
+zi 140
+no 140
+nicht 140
+im_ 139
+von_ 139
+von 139
+_nic 139
+_nich 139
+eine_ 139
+oc 138
+wei 138
+io 138
+schen 138
+gt 138
diff --git a/libtextcat/data/new_fingerprints/lm/greek.lm b/libtextcat/data/new_fingerprints/lm/greek.lm
new file mode 100644
index 000000000000..6dff6cd4f767
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/greek.lm
@@ -0,0 +1,400 @@
+_ 89284
+α 19666
+Ï„ 16086
+ο 15826
+ε 14848
+ι 12766
+ν 12189
+Ï€ 8776
+σ 8653
+Ï 8399
+κ 7761
+μ 7529
+Ï… 6850
+ά 6284
+_Ï„ 5918
+λ 5802
+Ï‚ 5371
+α_ 5272
+η 5236
+έ 4739
+ί 4609
+ό 4489
+το 4461
+Ï‚_ 4111
+ου 4075
+ι_ 4033
+ε_ 4019
+. 3916
+_κ 3897
+ο_ 3836
+._ 3810
+ν_ 3661
+_Ï€ 3414
+_σ 3333
+" 3247
+_μ 3242
+_το 3118
+, 3106
+,_ 3068
+γ 3058
+_α 2912
+ω 2689
+να 2593
+δ 2455
+τα 2420
+χ 2411
+κα 2406
+στ 2406
+_ε 2353
+- 2335
+-_ 2192
+Ï 2171
+αν 2162
+τη 2151
+ή 2141
+Ï…_ 2133
+αι 2100
+θ 2053
+φ 1989
+ου_ 1974
+ει 1889
+_κα 1882
+εί 1864
+το_ 1802
+πο 1771
+αι_ 1727
+σε 1709
+_ν 1647
+_" 1619
+η_ 1609
+ια 1602
+να_ 1518
+τι 1501
+ον 1501
+του 1495
+με 1460
+_του 1407
+_έ 1405
+_στ 1396
+ÏŽ 1364
+πε 1359
+τε 1300
+μα 1298
+β 1274
+Ïι 1252
+ό_ 1241
+_δ 1231
+ξ 1223
+ντ 1220
+_το_ 1196
+απ 1187
+Ïο 1184
+_γ 1183
+_τη 1177
+ζ 1158
+_εί 1150
+ά_ 1150
+_να 1124
+έν 1115
+και 1110
+_και 1093
+αν_ 1082
+Ïα 1078
+και_ 1061
+_να_ 1060
+_και_ 1053
+μπ 1049
+νο 1048
+ατ 1036
+ιο 1024
+ια_ 1018
+ÎµÏ 1003
+Î¿Ï 993
+_απ 984
+σε_ 974
+ικ 973
+_ο 967
+εν 947
+ος 936
+ει_ 935
+πό 901
+λο 892
+_με 890
+νε 884
+του_ 871
+ον_ 869
+ας 865
+_του_ 854
+ασ 841
+με_ 840
+σα 834
+κο 833
+Î±Ï 832
+Ï€Ï 824
+ίν 820
+κά 808
+_πο 804
+πι 796
+Κ 796
+μέ 783
+μο 777
+Î­Ï 772
+αλ 766
+ην 762
+Μ 759
+ισ 745
+κε 742
+τα_ 740
+στο 738
+ω_ 730
+ην_ 728
+Ο 717
+Τ 714
+_φ 711
+ετ 705
+δε 704
+πα 697
+ας_ 688
+τά 684
+ος_ 683
+_ό 680
+_Κ 675
+οι 671
+_χ 670
+την 663
+την_ 658
+_στο 647
+ή_ 645
+πό_ 638
+_θ 633
+_ο_ 631
+", 630
+_Ï€Ï 626
+_Μ 624
+ίπ 624
+άν 623
+",_ 623
+από 620
+που 619
+ότ 618
+λα 617
+τον 617
+_από 616
+μι 612
+Ο_ 611
+Ïε 607
+Ï…Ï„ 604
+λε 595
+_λ 594
+ÏÏŒ 590
+_με_ 586
+ιά 580
+τον_ 577
+γι 577
+_Ο 575
+από_ 572
+Î¬Ï 570
+πά 570
+_από_ 569
+Ïά 562
+ταν 554
+ένα 553
+υν 552
+που_ 551
+δι 547
+Ï„Ï 547
+τό 544
+_β 540
+χε 536
+εν_ 534
+ησ 528
+_Ο_ 525
+ης 520
+". 519
+_που 516
+_Τ 515
+"._ 513
+τη_ 512
+Ï‚. 510
+είπ 508
+ταν_ 504
+_είπ 503
+Ï‚._ 501
+_τα 500
+ξε 497
+στο_ 496
+λλ 493
+ακ 492
+Α 491
+_που_ 490
+άλ 489
+ίπε 489
+είπε 485
+_είπε 483
+_την 483
+τι_ 482
+_την_ 481
+άτ 480
+λι 480
+_δε 479
+άμ 477
+_στο_ 475
+σο 473
+_ά 468
+"_ 463
+μέν 463
+ιν 461
+ις 452
+_αν 452
+κό 451
+αυ 451
+_τον 450
+_γι 449
+θα 447
+ες 446
+ση 446
+_μι 445
+_τον_ 443
+ε. 441
+ε._ 441
+ους 439
+λά 439
+Ï…Ï‚ 439
+ολ 438
+πιο 437
+Î¿Ï 432
+_πα 428
+_κά 427
+τε_ 427
+αμ 423
+; 423
+_μο 422
+σκ 421
+της 421
+_ξ 418
+στη 415
+Κά 411
+νη 408
+για 405
+α. 403
+ÎºÏ 402
+κι 402
+Ï‚, 402
+Ï‚,_ 401
+α._ 398
+ηκ 397
+_Κά 397
+ελ 396
+_έν 395
+_ή 393
+_μα 392
+ία 391
+ως 391
+λη 390
+ίνα 389
+πί 389
+μου 388
+μά 388
+_αυ 387
+ης_ 386
+συ 384
+ναι 384
+Π 383
+αυτ 382
+ί_ 376
+μπι 375
+ίσ 372
+_της 370
+_τα_ 367
+_για 365
+_ένα 362
+_μπ 361
+θε 361
+ιον 359
+ις_ 358
+τή 358
+_θα 354
+_αυτ 354
+άμπ 352
+κ. 352
+κ._ 351
+είν 351
+ομ 350
+ίναι 348
+ντα 348
+ναι_ 348
+ως_ 347
+χα 346
+Ε 346
+Ï…. 346
+για_ 346
+Ï…._ 345
+δεν 345
+ένα_ 345
+α, 344
+α,_ 344
+δεν_ 344
+ÏÏ 343
+όν 343
+α- 342
+Ïσ 341
+_κ. 340
+στε 339
+Κάμ 339
+_κ._ 339
+τέ 339
+α-_ 338
+ευ 338
+ιλ 338
+Ïί 338
+Κάμπ 338
+_μου 338
+_Κάμ 338
+_Κάμπ 337
+υμ 336
+σει 336
+πιον 336
+μπιον 336
+μπιο 336
+_κο 334
+Κάμπι 334
+_η 334
+άμπι 334
+θα_ 333
+νι 332
+της_ 331
+ψ 331
+όμ 330
+ησε 330
+_σα 329
+μα_ 328
+ός 328
+Σ 326
+_δεν 325
+_δεν_ 325
+σμ 324
+ες_ 324
+Ï€ÎµÏ 324
+ίχ 323
+ίναι_ 323
+τικ 322
+_Ï 321
+άμπιο 321
+Ï…Ï‚_ 321
+ους_ 321
+_πε 321
+σω 320
+_για_ 320
+Ï€Ïο 320
+γε 318
+;" 316
+;"_ 316
+Δ 315
+Ï„Ï… 314
+Ïα_ 313
+_συ 312
+Ïω 312
+_θα_ 310
+όσ 309
+ου. 309
diff --git a/libtextcat/data/new_fingerprints/lm/hebrew.lm b/libtextcat/data/new_fingerprints/lm/hebrew.lm
new file mode 100644
index 000000000000..31b4ee0af280
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/hebrew.lm
@@ -0,0 +1,400 @@
+_ 81560
+×™ 23072
+ו 19215
+×” 15606
+ל 12612
+ר 11293
+ת 11070
+מ 10090
+ב 9648
+× 9601
+ש 9081
+×”_ 7811
+×  6685
+×¢ 6326
+_ת 5878
+× 5542
+ד 5019
+×— 4669
+_× 4570
+ב_ 4267
+×› 3984
+_×” 3812
+ק 3769
+פ 3699
+ל_ 3693
+××™ 3535
+מ_ 3489
+ס 3374
+×_ 3362
+תו 3130
+, 3067
+_ל 3037
+_, 3035
+_×™ 2936
+ש_ 2886
+_××™ 2840
+×’ 2768
+ט 2626
+ן 2424
+_ר 2395
+_תו 2291
+. 2256
+צ 2215
+_. 2205
+×™×  2144
+×¢_ 2003
+_ן 1959
+_ו 1913
+" 1815
+יב 1797
+לש 1742
+יל 1687
+יר 1665
+×™_ 1647
+וי 1620
+ו_ 1578
+ור 1525
+×ª× 1475
+×›_ 1469
+רו 1411
+×– 1383
+ונ 1353
+מה 1351
+תי 1343
+×™×™ 1343
+לו 1315
+יד 1285
+רי 1236
+מה_ 1211
+הל 1206
+ומ 1192
+× ×™ 1163
+רש 1155
+×™× 1146
+×ר 1138
+_×ª× 1137
+שי 1134
+יש 1087
+× _ 1080
+×œ× 1074
+וח 1067
+_× 1062
+שמ 1059
+ימ 1052
+×”×™ 1047
+- 1024
+לש_ 1008
+וה 973
+רב 967
+×•× 954
+ת×_ 951
+ול 948
+_ד 941
+×™×” 896
+נו 888
+וע 883
+×™×› 873
+וש 871
+לע 867
+×ל 850
+עו 843
+_- 830
+×—_ 830
+דו 824
+ןו 820
+ר_ 806
+וב 805
+_לש 799
+יט 784
+××” 773
+_לש_ 748
+×™×¢ 746
+די 743
+_ש 726
+_ת×_ 723
+ך 720
+_תי 719
+-_ 716
+_-_ 713
+בי 709
+בו 706
+ות 699
+××”_ 690
+רמ 686
+שה 683
+וד 678
+×™×— 675
+פ_ 672
+×”×  669
+_ב 668
+_×¢ 659
+_ך 655
+יס 652
+ןי 649
+_לע 640
+יו 635
+×מ 635
+יב_ 632
+ת_ 631
+×ž× 628
+שו 627
+_ןו 624
+לי 624
+לע_ 621
+תה 619
+ית 600
+הל_ 599
+וכ 599
+יפ 596
+פה 595
+וק 586
+הש 578
+×¢×™ 575
+_ןי 569
+מו 564
+_לע_ 561
+קי 560
+×™×’ 557
+×™×”_ 557
+רשי 554
+×ו 548
+×ל_ 548
+תוי 548
+ל×ר 546
+×¨× 542
+הר 540
+"_ 540
+מב 539
+שה_ 538
+ופ 538
+×רש 535
+רע 534
+×—×” 533
+וג 532
+×רשי 530
+ל×רשי 530
+ל×רש 530
+×ו 527
+מי 525
+_×™×› 518
+המ 518
+פה_ 511
+×™×›_ 510
+_×™×  509
+לכ 506
+תמ 502
+מב_ 500
+סו 498
+×—× 497
+יק 497
+וו 494
+_ק 485
+×’_ 481
+×יל 477
+_×”×™ 477
+דמ 472
+בה 470
+,×” 470
+_,×” 470
+ק_ 469
+עב 468
+_×ו 467
+הב 467
+×—×”_ 466
+_×™×›_ 463
+×ב 462
+רח 462
+_×ל 461
+×¥ 455
+מל 454
+×™× ×™ 454
+×©× 453
+רה 453
+יצ 452
+×יר 451
+_×”×  447
+טי 443
+ד_ 441
+מע 440
+××™×™ 439
+וה_ 439
+' 435
+×מ_ 430
+.× 429
+תנ 429
+_.× 428
+רק 427
+תר 423
+וס 417
+נש 417
+_הל 414
+סל 413
+נת 408
+ס_ 405
+סה 400
+לפ 400
+בש 399
+,× 399
+_,× 399
+ממ 397
+שי_ 396
+נב 396
+×¢×” 394
+תה_ 393
+תונ 393
+××™×  389
+_רו 385
+×¨×ž× 384
+×™×_ 384
+לב 384
+תב 381
+בר 378
+בה_ 377
+טס 374
+_×¥ 374
+עמ 374
+×—×™ 373
+רפ 373
+הו 371
+חו 370
+בל 370
+_×œ× 370
+קו 367
+_הר 366
+_×יר 364
+חמ 363
+×–_ 362
+_×— 360
+× ×› 360
+_לו 360
+כו 359
+,ת 358
+_,ת 357
+מת 356
+ינו 353
+ורי 353
+ו×_ 349
+רד 348
+תור 348
+××™ 345
+×£ 345
+לשמ 344
+×›×™ 340
+_×יל 340
+וצ 338
+תל 338
+_××™×™ 336
+×¨×—× 335
+_תוי 335
+מ×_ 332
+לח 331
+_תר 329
+_תונ 325
+×’×™ 325
+×¢×”_ 325
+: 324
+פל 324
+×ב_ 324
+שר 322
+רט 321
+תש 320
+צ_ 320
+מע_ 319
+וי_ 319
+_: 319
+צמ 316
+שממ 315
+_ס 315
+תינ 315
+סמ 315
+הד 313
+רה_ 312
+וט 312
+_×£ 310
+ש×ר 309
+רשי_ 305
+×רשי_ 305
+.××™ 305
+_.××™ 304
+הת 303
+יטס 303
+_' 303
+×’×” 302
+שמ_ 302
+_××™×  301
+לה 298
+רג 294
+חט 293
+דע 293
+×יד 292
+×ש 292
+לשממ 292
+××¢ 292
+ידי 290
+של 289
+פו 289
+דב 289
+צו 287
+_××¢ 286
+,××™ 285
+_,××™ 285
+נמ 284
+סי 282
+שב 282
+_רש 281
+דר 281
+_תור 281
+קה 280
+תוש 278
+הש_ 276
+מל_ 276
+_×ל_ 275
+והי 274
+_ל×ר 274
+פי 274
+עב_ 271
+ל×_ 271
+×‘× 270
+×—×_ 269
+חל 268
+עפ 267
+בע 267
+.×” 266
+_רי 266
+_ל×רש 266
+וני 266
+_.×” 265
+יטסל 265
+טסל 265
+×™×œ× 265
+תע 264
+× ×” 263
+קל 262
+ניט 260
+_.ת 260
+.ת 260
+פל_ 260
+הו_ 259
+סלפ 258
+טסלפ 258
+יטסלפ 258
+ניטסל 257
+ניטס 257
+ומ_ 256
+סה_ 256
+מש 255
+ירו 255
+נש_ 254
+ומת 254
+×”×” 252
+בק 251
+יש_ 251
+_ונ 251
+ירב 251
+_רב 249
+_יד 249
+_×יד 249
+×›×” 248
+×’×  247
+_דו 247
diff --git a/libtextcat/data/new_fingerprints/lm/hindi.lm b/libtextcat/data/new_fingerprints/lm/hindi.lm
new file mode 100644
index 000000000000..3b4e1584a943
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/hindi.lm
@@ -0,0 +1,400 @@
+_ 75620
+æ 19109
+U 16333
+ð 11131
+¤ 11107
+· 10241
+·¤ 7855
+Ú 6993
+ÚU 6598
+ç 6322
+è 6151
+Ù 5887
+_· 5800
+ã 5370
+â 5168
+U_ 5118
+æ_ 4935
+Ì 4508
+× 4467
+Ø 4292
+ð_ 3962
+_·¤ 3937
+è_ 3904
+Â 3718
+¤_ 3609
+Ü 3563
+ãU 3514
+_ç 3394
+´ 3363
+ß 2962
+Â¥ 2907
+ÚU_ 2851
+_Â¥ 2668
+_ã 2632
+_â 2575
+ô 2527
+ÃŽ 2513
+´_ 2512
+Ã 2451
+Uæ 2336
+Õ 2314
+_Â 2280
+_× 2260
+ñ 2187
+¢ 2082
+» 2078
+¤æ 1988
+ð´ 1913
+·¤æ 1886
+·ð 1881
+ð¤ 1877
+·ð¤ 1860
+Ã 1842
+Ã_ 1830
+·¤_ 1746
+é 1714
+æð 1703
+ð¤_ 1695
+·ð¤_ 1694
+æÚ 1631
+ü 1610
+_·ð 1579
+_·ð¤ 1567
+ð´_ 1556
+æÚU 1536
+Øæ 1528
+Ùð 1525
+Uè 1515
+_·ð¤_ 1488
+Ù_ 1411
+Ùð_ 1407
+à 1390
+_ãñ 1357
+ãñ 1357
+Ã… 1337
+Ì_ 1319
+_Õ 1315
+×ð 1300
+ç· 1279
+Ö 1270
+_Ã 1258
+_·¤æ 1209
+ç·¤ 1203
+¤è 1195
+_Ù 1187
+° 1181
+§ 1166
+×ð´ 1163
+àæ 1162
+ý 1157
+¿ 1155
+_×ð 1150
+×ð´_ 1140
+·¤è 1127
+¤è_ 1092
+_×ð´ 1086
+_×ð´_ 1077
+ñU 1075
+_ãU 1071
+·¤è_ 1065
+ãñU 1062
+_ãñU 1062
+æÙ 1053
+¥æ 1052
+Ìæ 1038
+¤Ú 1035
+_ç· 1009
+¤ÚU 972
+âð 970
+_ç·¤ 967
+_·¤è 959
+ÚUæ 936
+ãUæ 930
+_·¤è_ 929
+·¤Ú 928
+Ã 926
+_¥æ 904
+Ã…U 904
+Ç 904
+© 890
+ê 887
+©U 885
+·¤ÚU 866
+_© 865
+_©U 862
+_Ö 854
+âð_ 850
+_Ú 847
+_Ì 837
+S 834
+UÃ 831
+_ÃŽ 830
+UÃ_ 823
+_ß 798
+Ùæ 788
+â_ 783
+_ÚU 773
+Uè_ 770
+ô´ 753
+æØ 752
+Ã 749
+Øæ_ 732
+×æ 732
+ô_ 704
+_§ 704
+Ãæ 704
+_âð 697
+Âý 695
+ãUè 693
+¤æ_ 690
+ü_ 688
+æÚU_ 685
+, 684
+_» 682
+·¤æ_ 679
+,_ 674
+_·¤Ú 670
+¹ 665
+ðU 664
+ßæ 648
+_Âý 647
+UÌ 644
+Ü_ 643
+_Ü 641
+ç·¤_ 639
+Ø_ 639
+Üæ 633
+_âð_ 633
+æç 623
+Uô 621
+ô´_ 615
+_·¤ÚU 611
+Uæ_ 599
+ãU_ 595
+Üð 594
+UÙ 589
+ñUà 580
+_ãñUà 580
+ãñUà 580
+_° 576
+_Ãæ 573
+ñUÃ_ 572
+ãñUÃ_ 572
+æÜ 569
+_Ø 569
+_Ùð 569
+ÂÚ 561
+_ç·¤_ 557
+‡ 556
+¤ô 552
+ææ 550
+ÂÚU 549
+çß 544
+Õæ 538
+_·¤æ_ 535
+×_ 532
+çÜ 525
+âæ 523
+·¤ô 519
+æð_ 502
+æ¢ 501
+¸ 498
+_Ùð_ 495
+‹ 494
+_ÂÚ 493
+Âæ 493
+Ìæ_ 490
+_ÂÚU 485
+çÙ 484
+õ 481
+È 478
+ྠ469
+. 459
+ÂÚU_ 458
+Öæ 449
+Øð 449
+_çß 445
+§â 444
+¤ÚU_ 443
+Öè 442
+_§â 440
+_ÂÚU_ 439
+æð´ 437
+Ùæ_ 435
+€ 434
+_¿ 433
+ÚUè 431
+⢠431
+_·¤ô 430
+Îð 427
+æÌ 425
+ÃŽ_ 420
+Öè_ 419
+¸U 415
+˜ 412
+˜æ 412
+§ü 410
+´U 405
+ÇU 399
+Ⱦ 399
+·¤ÚU_ 394
+Uã 394
+æÙ_ 393
+çÌ 393
+¤ô_ 392
+·¤ô_ 392
+çÎ 389
+ÚUÌ 385
+æà 385
+Ǹ 383
+æð´_ 382
+Ìè 381
+Ãæ 379
+çÚ 375
+°_ 374
+ãUô 374
+ÚUã 374
+æà 373
+æÃ_ 372
+_·¤ô_ 371
+_Õæ 369
+æè 364
+çÚU 364
+ðU_ 362
+¤æð 358
+Ȥ 357
+Uæð 354
+è´ 353
+â· 352
+ß_ 350
+U· 349
+¤æÚ 346
+Ìð 346
+·¤æð 346
+æü 345
+õÚ 344
+õÚU 342
+·¤æÚ 342
+_×æ 341
+_Öè 341
+_çÜ 340
+ñU_ 337
+_ãñU_ 337
+ÿ 337
+ãñU_ 337
+Ùè 336
+ãUè_ 334
+¿æ 334
+ñ´ 334
+_Öè_ 332
+æ× 327
+¤æÚU 327
+ÿæ 326
+_Ã 325
+U·¤ 323
+·¤æÚU 323
+Uè´ 322
+ãUè´ 321
+_âæ 320
+ǸU 319
+_¥õ 319
+¥õ 319
+õÚU_ 319
+_ÚUã 318
+Úð 317
+è´_ 316
+_⢠316
+æ· 313
+Øô 310
+_ãUæ 309
+Øã 309
+À 308
+ØãU 308
+_Øã 308
+_ØãU 307
+_·¤æð 304
+_¥õÚ 304
+¥õÚ 304
+_¥õÚU 304
+_Öæ 304
+¥õÚU 304
+¥õÚU_ 303
+_çÙ 303
+ãUè´_ 300
+Uè´_ 300
+_S 300
+Îæ 300
+UÜ 298
+ÚUè_ 296
+æÎ 296
+æß 294
+Ã…U_ 294
+Øð_ 293
+Ùã 292
+âè 291
+_Ùã 290
+æè_ 290
+ðà 290
+Üð_ 289
+UãU 288
+ÙãU 288
+Uâ 288
+_ÙãU 286
+_à 286
+_ÙãUè 285
+_Âæ 285
+ÙãUè 285
+æ·¤ 284
+_àæ 284
+ÙãUè´ 283
+UÌ_ 282
+ãñ´ 280
+_ãñ´ 280
+ñ´U 280
+â× 279
+_çÎ 278
+_ãñ´U 278
+ãñ´U 278
+»_ 277
+_Îð 275
+ðàæ 274
+àæ_ 273
+æñ 272
+·¤ã 272
+¤ã 272
+Ìð_ 272
+_ãUô 272
+‡æ_ 272
+- 271
+¤ãU 271
+·¤ãU 271
+¢_ 271
+_·¤ã 270
+ãUæ_ 270
+_·¤ãU 269
+ÚUãU 268
+ãé 267
+æâ 265
+°· 263
+¤Ø 263
+¤æð_ 262
+·¤æð_ 262
+°·¤ 262
+ÚðU 258
+_°· 258
+Ùè_ 258
+_°·¤ 257
+ÀU 256
+v 253
+ÂÙ 252
+_ÚUæ 252
+Üè 249
+ç× 247
+çâ 246
+_Ã… 246
+ÚUÙ 246
+×é 245
+._ 245
+UÚ 244
+éU 243
diff --git a/libtextcat/data/new_fingerprints/lm/hungarian.lm b/libtextcat/data/new_fingerprints/lm/hungarian.lm
new file mode 100644
index 000000000000..307348b47789
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/hungarian.lm
@@ -0,0 +1,400 @@
+_ 19186
+e 5753
+a 4627
+t 4522
+s 3480
+l 3437
+n 3137
+k 3036
+i 2527
+r 2437
+z 2399
+o 2303
+á 2039
+é 1995
+g 1978
+m 1695
+y 1338
+_a 1256
+b 1186
+d 1148
+a_ 1108
+v 1057
+t_ 901
+sz 889
+el 832
+, 819
+,_ 818
+h 792
+k_ 769
+. 767
+et 743
+gy 711
+s_ 705
+_m 702
+_a_ 695
+en 671
+ö 662
+n_ 646
+_k 645
+j 623
+._ 613
+i_ 606
+eg 601
+p 586
+_e 580
+u 579
+le 576
+ó 542
+er 495
+f 485
+ek 477
+te 477
+és 473
+_s 471
+al 464
+ta 458
+í 453
+_h 444
+_t 442
+an 426
+ze 425
+me 406
+at 405
+l_ 401
+es 395
+õ 387
+y_ 381
+z_ 375
+tt 374
+ke 372
+_v 369
+ás 368
+ak 367
+_é 365
+ny 363
+tá 359
+c 358
+re 350
+to 347
+A 343
+e_ 340
+ü 332
+ne 330
+os 326
+ál 320
+_f 320
+az 317
+zt 317
+ár 317
+_n 315
+ko 312
+_A 303
+_sz 302
+is 301
+ve 299
+gy_ 297
+ít 293
+_b 293
+ra 291
+or 289
+ol 284
+_i 281
+em 279
+_l 274
+la 264
+ez 262
+be 260
+lt 260
+ok 260
+ye 256
+_me 252
+on 251
+en_ 247
+ar 245
+_az 245
+in 243
+án 242
+se 242
+ég 238
+egy 237
+ha 237
+r_ 237
+té 237
+ér 235
+sze 233
+én 226
+ly 224
+g_ 221
+" 221
+ll 219
+iz 214
+de 214
+ek_ 213
+mi 212
+rt 211
+ba 209
+ho 209
+A_ 209
+és_ 209
+az_ 205
+va 204
+ag 203
+ka 202
+na 202
+og 201
+ik 201
+nt 200
+_A_ 199
+ô 199
+- 198
+tt_ 198
+_az_ 197
+ni 195
+cs 194
+ki 190
+kö 187
+át 187
+_eg 187
+nd 186
+fe 185
+_és 185
+lá 182
+bi 181
+nk 180
+_le 179
+an_ 179
+_és_ 177
+tás 175
+ké 174
+meg 173
+_egy 172
+ma 171
+as 170
+sa 170
+sí 169
+ge 167
+ot 167
+za 167
+E 166
+m_ 165
+_meg 165
+_el 165
+bb 164
+ro 164
+zá 163
+he 162
+má 161
+sé 160
+_r 160
+sít 160
+tos 159
+ti 159
+st 158
+_j 158
+él 157
+it 156
+_ho 156
+ül 156
+_ha 155
+vé 154
+am 152
+oz 152
+ele 151
+ya 151
+zto 150
+ú 149
+biz 147
+so 147
+et_ 145
+izto 144
+izt 144
+ap 141
+"_ 141
+ed 141
+ss 140
+bizt 140
+ék 140
+bizto 140
+iztos 139
+ét 139
+ztos 139
+osít 138
+zet 138
+osí 138
+mé 137
+_is 137
+t, 136
+tosít 136
+tosí 136
+t,_ 136
+ó_ 135
+agy 135
+ztosí 135
+li 134
+om 134
+_fe 134
+ere 133
+ág 133
+t. 132
+nek 131
+vi 129
+_d 129
+zo 128
+k,_ 128
+k, 128
+_kö 127
+_p 127
+M 126
+let 126
+ak_ 125
+já 125
+ett 125
+û 124
+si 124
+ész 123
+_E 123
+ép 123
+vá 123
+rá 123
+t._ 123
+is_ 123
+S 123
+ítás 122
+ítá 122
+kor 121
+ai 121
+fel 120
+da 120
+_mi 120
+pe 119
+ogy 118
+ban 118
+ad 117
+ga 116
+_va 116
+ott 114
+_ne 114
+_ki 113
+ör 113
+zé 112
+ben 112
+_te 111
+zi 111
+sá 110
+ség 109
+do 109
+tó 108
+em_ 108
+_" 108
+_ta 108
+_M 107
+ogy_ 107
+_á 107
+k. 106
+ól 105
+_ke 105
+_g 104
+: 103
+gye 102
+ák 102
+hog 102
+ri 102
+mo 101
+ok_ 101
+:_ 101
+hogy 101
+il 101
+el_ 100
+zer 100
+ete 99
+nn 99
+nak 98
+je 98
+sítá 98
+szá 98
+yo 98
+osítá 98
+sítás 98
+_ké 98
+_hog 98
+lé 97
+_S 97
+_hogy 97
+ig 97
+_- 96
+hogy_ 96
+ban_ 96
+ese 95
+_bi 94
+fo 94
+ja 94
+ul 94
+õ_ 94
+k._ 94
+_c 93
+ká 91
+es_ 91
+ná 91
+ény 91
+gé 91
+ás_ 91
+egy_ 90
+áb 90
+rd 89
+I 89
+ány 89
+_biz 89
+_fel 88
+öv 88
+ala 88
+szer 88
+po 88
+_ma 88
+leg 88
+tö 88
+ket 87
+un 87
+di 87
+ai_ 87
+nek_ 87
+rm 86
+tal 86
+év 85
+_is_ 85
+nem 85
+ti_ 84
+öz 84
+szt 84
+ut 83
+ter 83
+dé 83
+kk 83
+or_ 83
+b_ 82
+né 82
+os_ 82
+re_ 82
+rs 82
+_bizt 82
+min 82
+ben_ 81
+ra_ 81
+ik_ 81
+go 80
+len 80
+lm 80
+öt 80
+ely 80
+aj 80
+öl 80
+_sze 80
+_be 79
+ev 79
+ré 79
+ssz 79
+nt_ 79
+gya 79
+K 79
+si_ 79
+sza 78
diff --git a/libtextcat/data/new_fingerprints/lm/icelandic.lm b/libtextcat/data/new_fingerprints/lm/icelandic.lm
new file mode 100644
index 000000000000..b1fe0f2c27dd
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/icelandic.lm
@@ -0,0 +1,400 @@
+_ 26104
+a 6496
+r 6044
+n 5160
+i 5123
+s 3987
+e 3891
+u 3582
+t 3300
+ð 3126
+l 3071
+g 2726
+m 2459
+k 2256
+f 2230
+r_ 1967
+v 1641
+ar 1472
+ð_ 1420
+_s 1332
+á 1248
+o 1221
+í 1197
+a_ 1155
+in 1150
+i_ 1114
+h 1023
+j 975
+d 974
+st 929
+að 925
+. 922
+n_ 875
+._ 875
+_v 874
+m_ 854
+nn 842
+_f 840
+ur 824
+_a 806
+ó 785
+_h 782
+æ 779
+ið 771
+er 765
+um 727
+g_ 716
+y 711
+_e 709
+þ 672
+b 672
+ir 671
+ri 670
+an 667
+ö 650
+_á 648
+að_ 645
+u_ 633
+na 631
+í_ 625
+ar_ 622
+_þ 608
+á_ 606
+_í 595
+ta 567
+ei 566
+la 558
+_m 549
+_í_ 549
+um_ 547
+t_ 535
+ti 529
+_o 524
+ur_ 523
+_að 519
+ið_ 512
+ði 496
+ve 494
+og 494
+ú 482
+og_ 479
+_og 478
+_og_ 478
+nd 469
+p 464
+ra 455
+un 454
+ir_ 452
+_að_ 451
+ni 439
+en 439
+ðu 439
+_á_ 436
+ng 434
+il 404
+ga 395
+_t 395
+nu 393
+ki 392
+ja 383
+inn 379
+_b 375
+sk 352
+s_ 350
+vi 349
+rð 347
+ða 345
+ef 339
+ag 336
+_u 330
+se 324
+lu 324
+af 321
+_ve 320
+tu 318
+em 307
+eg 304
+nn_ 303
+_l 303
+va 301
+_k 296
+, 295
+ns 292
+re 292
+tt 291
+,_ 291
+l_ 286
+am 286
+es 285
+yr 285
+al 281
+da 277
+S 275
+gu 273
+_se 271
+ver 268
+_g 266
+ing 266
+_n 262
+is 258
+_er 257
+sa 256
+ður 255
+le 255
+_st 255
+_S 254
+sem 254
+ll 254
+me 253
+ha 251
+li 249
+kk 249
+rs 247
+_vi 247
+rn 246
+sl 244
+gi 243
+ss 242
+rf 241
+fy 240
+ði_ 240
+mi 238
+ka 237
+ma 231
+ld 230
+é 229
+rir 227
+sta 227
+fyr 227
+ví 227
+di 226
+ru 224
+var 224
+_fy 222
+ku 221
+em_ 221
+nar 220
+_sem_ 220
+_sem 220
+sem_ 220
+he 219
+yri 217
+_fyr 216
+si 216
+yrir 215
+au 212
+er_ 212
+ek 211
+_ha 210
+þe 209
+fyri 207
+fyrir 207
+_þe 205
+fi 204
+fr 203
+ge 201
+or 200
+ne 200
+ann 198
+jó 198
+_va 196
+_fyri 196
+_ver 194
+fl 192
+_er_ 191
+_um 189
+ík 188
+til 187
+_he 186
+fa 186
+il_ 182
+_ti 178
+_til 177
+gar 176
+_var 176
+na_ 176
+ý 175
+eð 171
+fu 170
+nni 169
+_me 168
+ki_ 167
+við 166
+ey 165
+fn 165
+arf 164
+til_ 163
+st_ 162
+_til_ 162
+þa 161
+num 161
+_þa 161
+as 160
+_við 160
+rt 159
+el 158
+uð 156
+inn_ 155
+_um_ 154
+ra_ 153
+bæ 153
+tar 151
+ta_ 151
+erð 151
+ór 148
+and 148
+_sa 146
+ig 146
+_en 146
+nga 145
+rir_ 145
+us 144
+jar 143
+et 143
+ár 142
+_sk 140
+ndi 140
+æð 139
+var_ 139
+_r 138
+av 138
+æk 137
+nna 137
+ður_ 136
+ál 136
+ko 135
+nin 135
+við_ 135
+ól 135
+ins 134
+ik 133
+E 133
+K 133
+yrir_ 133
+ns_ 133
+on 133
+ein 132
+_við_ 132
+ög 132
+já 132
+Þ 132
+öl 132
+ðs 132
+_mi 131
+f_ 131
+sí 131
+sj 131
+stu 131
+nda 130
+_var_ 130
+gr 129
+ús 128
+tæ 127
+ri_ 126
+haf 126
+_sí 125
+vík 124
+rin 124
+te 124
+r. 124
+r._ 123
+H 123
+nes 123
+ót 123
+ru_ 123
+kr 122
+F 122
+ær 121
+num_ 121
+k_ 121
+a. 121
+_H 121
+_fr 120
+_ge 120
+rá 120
+_E 120
+_Þ 120
+ug 120
+ngu 119
+an_ 119
+inga 118
+_K 118
+_haf 118
+enn 117
+ars 117
+rið 117
+en_ 117
+sin 116
+kur 116
+it 116
+ða_ 116
+ti_ 115
+rði 114
+tj 114
+ni_ 114
+at 114
+tarf 114
+br 113
+slu 113
+kki 113
+rg 113
+má 113
+kv 113
+_en_ 113
+a._ 112
+gar_ 112
+du 112
+ju 110
+eið 110
+und 110
+lag 110
+tur 110
+ega 109
+hú 109
+íð 109
+gn 109
+hef 109
+kj 109
+_hef 109
+_sta 108
+B 108
+V 108
+sam 107
+_ei 106
+_B 106
+ft 106
+ga_ 106
+G 106
+_G 105
+lö 105
+kki_ 105
+star 104
+in_ 104
+R 104
+með 104
+_ár 103
+_með 103
+ekk 103
+inu 103
+tö 103
+_V 103
+m. 103
+aði 103
+jö 102
+æj 102
+_bæ 102
+ess 102
+hús 101
+ut 101
+gs 101
+aví 101
+mu 101
+_R 101
+_ú 101
+rst 100
+æjar 100
+leg 100
+æja 100
+ja_ 99
+avík 99
diff --git a/libtextcat/data/new_fingerprints/lm/indonesian.lm b/libtextcat/data/new_fingerprints/lm/indonesian.lm
new file mode 100644
index 000000000000..3fa5a09b4691
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/indonesian.lm
@@ -0,0 +1,400 @@
+_ 19406
+a 10666
+n 5455
+e 4535
+i 4387
+r 2936
+t 2902
+an 2853
+u 2841
+k 2761
+s 2311
+m 2178
+d 2134
+g 2105
+l 1780
+a_ 1506
+n_ 1476
+ng 1449
+p 1397
+b 1275
+an_ 1270
+o 1246
+h 1130
+i_ 1108
+er 1038
+ka 1032
+_d 1006
+y 997
+, 951
+en 941
+ar 914
+,_ 900
+_m 880
+ya 842
+ta 838
+ang 797
+di 787
+da 773
+. 754
+la 742
+._ 738
+me 732
+ak 728
+_s 718
+at 690
+ra 688
+ga 683
+_k 671
+_me 650
+in 628
+ah 601
+_t 583
+_p 570
+g_ 551
+_b 551
+_di 548
+ng_ 544
+ma 536
+se 526
+tu 511
+na 506
+al 500
+ri 490
+as 483
+k_ 482
+j 480
+si 470
+ny 467
+h_ 457
+sa 452
+ang_ 439
+it 424
+kan 423
+ti 418
+_se 417
+pe 412
+S 409
+ba 407
+ke 407
+em 405
+men 405
+be 403
+un 401
+te 401
+am 396
+pa 395
+nya 390
+_men 374
+el 374
+t_ 371
+_a 360
+_i 356
+u_ 355
+kan_ 345
+_ke 339
+is 335
+ah_ 324
+_S 324
+eng 321
+nga 320
+ia 318
+_pe 316
+ha 313
+ap 311
+r_ 308
+w 305
+li 301
+_da 300
+s_ 299
+P 296
+nd 290
+_be 287
+ik 283
+ja 281
+yan 281
+ad 275
+ek 273
+uk 272
+di_ 270
+bu 269
+ya_ 268
+yang 268
+ak_ 266
+ber 265
+_y 265
+_ya 264
+_P 263
+ru 260
+K 259
+yang_ 256
+_yang 256
+_yan 256
+nt 255
+de 253
+_te 250
+wa 249
+et 247
+at_ 246
+ara 245
+gan 243
+A 237
+ari 235
+ala 230
+itu 229
+c 225
+ol 225
+ni 225
+us 225
+dan 224
+_K 224
+M 224
+B 223
+ata 222
+ai 221
+ur 219
+nya_ 217
+ua 215
+_ka 214
+_ber 210
+eb 209
+ran 206
+D 206
+ela 206
+_di_ 205
+_l 204
+ngan 204
+ter 203
+re 201
+- 199
+aka 198
+l_ 194
+_A 191
+era 191
+a, 191
+e_ 190
+ir 187
+I 186
+tan 185
+_B 184
+ut 184
+ku 183
+a,_ 183
+il 182
+J 181
+um 180
+_it 180
+_itu 180
+_ta 179
+su 179
+dan_ 177
+es 177
+on 177
+or 177
+_dan 176
+lu 174
+_M 172
+tu_ 172
+_dan_ 172
+enga 171
+mb 169
+R 169
+si_ 168
+per 168
+gan_ 168
+ngan_ 165
+" 162
+_ter 162
+a. 161
+man 161
+gk 160
+a._ 160
+asi 160
+ngk 160
+ep 160
+ag 159
+ul 158
+da_ 157
+m_ 155
+du 155
+ada 153
+ki 153
+rt 150
+mp 150
+T 150
+ama 148
+ing 148
+na_ 147
+_J 147
+_D 145
+ung 145
+ana 145
+n, 144
+ju 144
+ud 144
+rin 143
+gi 143
+aw 141
+lah 138
+lan 138
+_sa 136
+ri_ 136
+meng 136
+_meng 135
+_ma 134
+n,_ 134
+awa 134
+st 134
+eka 133
+mi 133
+mu 132
+_T 132
+po 131
+ge 131
+ar_ 130
+id 129
+ko 129
+le 128
+_h 128
+ena 127
+_j 126
+emb 126
+ina 125
+_r 124
+itu_ 124
+ay 123
+ngg 123
+gg 123
+rang 123
+pi 120
+nan 120
+_ba 119
+_la 119
+apa 119
+_I 118
+p_ 118
+bi 117
+ai_ 117
+ta_ 116
+san 116
+Ke 116
+ro 115
+eri 114
+kar 113
+lah_ 113
+_itu_ 112
+aya 111
+i, 110
+an, 110
+ra_ 110
+_per 110
+im 110
+ika 109
+isi 109
+mem 109
+tah 108
+_Ke 108
+ian 108
+_mem 108
+akan 108
+Se 108
+to 107
+ab 107
+ngka 106
+rs 106
+gka 106
+uk_ 105
+seb 104
+_de 104
+pu 104
+i,_ 104
+ita 104
+nda 103
+_ti 103
+ni_ 103
+ca 103
+_Se 103
+ers 103
+pen 103
+ini 102
+an,_ 102
+angk 101
+uh 101
+han 101
+nta 100
+_in 99
+f 99
+nj 99
+ok 99
+aga 99
+_R 98
+as_ 98
+tr 98
+mer 97
+lam 97
+and 97
+end 96
+anga 96
+ne 96
+Sa 96
+ka_ 96
+arin 95
+gu 95
+_ha 94
+Z 94
+al_ 94
+ga_ 94
+_Z 93
+_pen 93
+dar 93
+Ad 93
+i._ 93
+ada_ 93
+atan 93
+tak 93
+i. 93
+ia_ 92
+asa 92
+ap_ 92
+ari_ 92
+kat 92
+_seb 92
+_Za 91
+Za 91
+den 91
+n. 90
+_u 90
+_Ad 90
+engan 89
+ib 89
+any 89
+n._ 89
+o_ 89
+Zar 88
+Zari 88
+rina 88
+_Zar 88
+_Zari 88
+Zarin 88
+arina 88
+ks 88
+angka 87
+oli 87
+eg 87
+kt 86
+_Sa 86
+hu 85
+ih 85
+us_ 85
+adi 85
+om 85
+eba 85
+anya 85
+_bu 84
+denga 83
+L 83
+ed 83
+dak 83
+deng 83
+ma_ 82
+asi_ 82
diff --git a/libtextcat/data/new_fingerprints/lm/irish_gaelic.lm b/libtextcat/data/new_fingerprints/lm/irish_gaelic.lm
new file mode 100644
index 000000000000..b6874862da01
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/irish_gaelic.lm
@@ -0,0 +1,400 @@
+_ 8010
+a 2622
+i 1573
+h 1334
+n 1247
+r 968
+_a 847
+e 830
+s 817
+t 747
+l 639
+c 636
+g 598
+o 590
+d 554
+n_ 501
+a_ 487
+m 432
+an 415
+u 413
+b 379
+h_ 352
+ai 350
+ch 350
+ea 346
+r_ 346
+í 334
+é 321
+_s 309
+á 306
+in 281
+ar 277
+. 269
+_d 266
+s_ 254
+ir 253
+_b 250
+f 250
+an_ 246
+, 241
+,_ 226
+ag 225
+_an 221
+bh 218
+_c 216
+._ 212
+ac 210
+ha 208
+_a_ 201
+" 199
+_m 199
+th 198
+_t 190
+ach 182
+_ag 180
+_an_ 179
+í_ 176
+_l 168
+na 168
+nn 160
+e_ 159
+ar_ 158
+_g 157
+ú 156
+_i 152
+il 150
+le 150
+is 143
+ó 142
+_bh 138
+ei 138
+g_ 135
+_f 135
+dh 135
+l_ 126
+t_ 125
+ig 123
+é_ 122
+_n 120
+gu 120
+á_ 120
+mh 118
+id 117
+ch_ 117
+ad 116
+he 114
+ir_ 114
+ra 109
+o_ 109
+ach_ 107
+ia 105
+_ar 105
+us 104
+ui 104
+_" 101
+us_ 100
+T 99
+am 99
+ta 98
+gus 98
+gus_ 98
+_le 97
+gh 97
+_ch 97
+agus 94
+agus_ 94
+agu 94
+éa 93
+_agus 93
+_agu 93
+ean 93
+na_ 92
+d_ 92
+ái 91
+p 89
+it 89
+A 89
+_ar_ 88
+rt 86
+al 85
+oi 84
+sa 84
+"_ 82
+hai 81
+_r 79
+nn_ 79
+hu 79
+as 79
+éi 78
+_T 78
+ma 77
+air 77
+at 77
+ann 76
+B 76
+sé 76
+hí 75
+igh 74
+st 74
+ga 73
+go 71
+ua 71
+ne 71
+la 71
+- 71
+de 71
+te 71
+re 70
+inn 70
+ith 69
+eac 69
+_sé 69
+in_ 68
+_go 68
+hi 68
+each 68
+dh_ 68
+si 67
+ag_ 67
+_go_ 66
+hea 66
+go_ 66
+tha 64
+om 64
+_sé_ 63
+sé_ 63
+hí_ 63
+on 62
+se 61
+úi 60
+nt 60
+C 60
+D 59
+i_ 58
+_ag_ 58
+is_ 58
+ío 58
+_de 57
+_B 56
+il_ 56
+or 56
+_th 54
+ca 53
+fa 53
+amh 53
+_A 53
+le_ 52
+? 52
+S 51
+io 51
+_in 51
+sí 51
+li 51
+rai 50
+hf 50
+ht 50
+eo 50
+sc 50
+ri 49
+: 49
+igh_ 49
+gh_ 49
+_sí 49
+:_ 49
+há 49
+_D 49
+be 49
+aig 49
+hé 48
+oc 48
+idh 48
+rt_ 48
+ho 47
+os 47
+ann_ 47
+_C 46
+! 46
+Bh 46
+bhf 45
+_si 45
+lt 45
+_bhf 45
+irt 45
+ear 44
+_na 44
+ta_ 44
+air_ 44
+_p 44
+im 44
+aga 44
+_ma 44
+_S 44
+aigh 43
+án 43
+_dh 43
+uai 43
+ao 43
+cht 43
+ain 42
+bhe 42
+ait 42
+fh 42
+sa_ 41
+m_ 41
+adh 41
+ile 41
+_é 41
+ail 41
+eir 41
+ói 41
+_Bh 40
+as_ 40
+cha 40
+idh_ 40
+hái 39
+_i_ 39
+bh_ 39
+th_ 39
+ad_ 39
+och 39
+mh_ 39
+tr 39
+rea 38
+_se 38
+ro 38
+rí 38
+hair 38
+_is 38
+uil 37
+iú 37
+áin 37
+I 37
+ll 37
+mé 37
+_be 36
+ba 36
+eann 36
+tá 36
+_o 36
+M 36
+aid 36
+aith 36
+ib 36
+' 36
+tea 36
+_mé 35
+chu 35
+ibh 35
+each_ 35
+ean_ 34
+irt_ 34
+_na_ 34
+N 34
+ist 34
+fu 34
+mha 34
+bea 34
+h. 34
+_bhe 34
+lá 34
+ic 34
+_sí_ 33
+eis 33
+bhí 33
+ni 33
+héa 33
+_sa 33
+ith_ 33
+sí_ 33
+har 33
+_bhí 33
+ig_ 32
+ur 32
+aí 32
+hr 32
+_am 32
+_bhí_ 31
+da 31
+úir 31
+hfu 31
+_chu 31
+ol 31
+ne_ 31
+_fa 31
+An 31
+Bhí_ 31
+Bhí 31
+n, 31
+_ac 31
+bhí_ 31
+_bhfu 30
+_ó 30
+ana 30
+_M 30
+mé_ 30
+_fh 30
+aigh_ 30
+bhfu 30
+_mé_ 30
+tú 29
+_le_ 29
+tá_ 29
+hean 29
+háin 29
+sin 29
+eir_ 29
+nne 29
+cé 29
+_aga 29
+h._ 29
+Tá 29
+ibh_ 29
+iste 28
+An_ 28
+do 28
+hui 28
+fui 28
+ní 28
+ste 28
+acht 28
+n,_ 28
+co 28
+dea 28
+ng 28
+nach 28
+id_ 28
+hfui 28
+.. 28
+lei 28
+nac 28
+ce 27
+a. 27
+c_ 27
+lea 27
+hfuil 27
+_Bhí_ 27
+_bea 27
+adh_ 27
+di 27
+fuil 27
+." 27
+Tá_ 27
+ha_ 27
+ú_ 27
+uil_ 27
+."_ 27
+bhfui 27
+_Bhí 27
+éan 27
+_do 27
+lta 27
+aoi 27
+_lei 27
+_mh 26
+dú 26
+fuil_ 26
+eat 26
+-_ 26
+teac 26
+ath 26
diff --git a/libtextcat/data/new_fingerprints/lm/italian.lm b/libtextcat/data/new_fingerprints/lm/italian.lm
new file mode 100644
index 000000000000..543cadcfa88e
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/italian.lm
@@ -0,0 +1,400 @@
+_ 25028
+a 7570
+e 6477
+i 5481
+o 5104
+l 3905
+n 3866
+r 3502
+t 2934
+c 2862
+s 2862
+a_ 2504
+e_ 2404
+d 2004
+i_ 1749
+o_ 1679
+u 1650
+v 1611
+p 1561
+m 1414
+_c 1325
+, 1192
+,_ 1192
+_s 1190
+_d 1094
+g 1067
+an 925
+er 915
+_a 914
+_p 895
+la 858
+_l 830
+re 799
+ar 769
+h 762
+no 753
+co 726
+va 698
+_e 657
+n_ 656
+on 656
+ra 653
+to 651
+f 638
+di 638
+_i 634
+ch 634
+ll 633
+l_ 624
+la_ 598
+ta 593
+el 576
+in 567
+_m 558
+en 529
+b 528
+ri 525
+_co 523
+_n 523
+_di 522
+li 513
+av 507
+al 501
+le 494
+ia 492
+se 484
+ol 479
+_f 477
+or 477
+te 469
+_e_ 467
+ve 454
+at 449
+de 447
+. 443
+ne 429
+va_ 428
+ca 426
+._ 422
+tt 422
+re_ 415
+nt 415
+io 411
+_v 407
+pe 405
+z 392
+to_ 391
+_ch 389
+na 384
+si 384
+' 383
+he 382
+no_ 379
+ci 374
+_la 373
+ro 371
+_g 370
+st 368
+cc 366
+he_ 362
+di_ 362
+ma 358
+ev 354
+che 354
+es 352
+me 352
+pa 351
+_t 349
+ti 348
+_di_ 347
+ss 345
+che_ 344
+a,_ 337
+a, 337
+nd 335
+o, 333
+o,_ 333
+ell 330
+gl 323
+sa 322
+il 322
+gli 321
+da 318
+as 318
+do 314
+_che 308
+_che_ 306
+eva 306
+_la_ 300
+lla 298
+le_ 293
+un 291
+_pe 290
+_de 288
+q 283
+qu 283
+ava 280
+po 277
+on_ 275
+r_ 273
+li_ 273
+_b 269
+_il 268
+_il_ 268
+il_ 268
+lo 267
+om 263
+e, 263
+e,_ 263
+ni 258
+tr 258
+so 255
+ra_ 253
+os 251
+_in 249
+_u 248
+per 244
+are 243
+et 243
+_se 240
+ano 239
+si_ 238
+_ca 238
+_qu 238
+lla_ 238
+_q 238
+_a_ 236
+ac 236
+_r 234
+ic 233
+_no 232
+ie 227
+fa 227
+hi 226
+del 225
+ua 222
+_per 218
+ce 218
+_ma 216
+sc 216
+_del 215
+mi 212
+_un 208
+chi 206
+era 205
+i, 205
+i,_ 205
+su 203
+and 202
+vo 202
+_fa 201
+eva_ 200
+ano_ 199
+gli_ 197
+non 196
+pi 196
+vi 195
+er_ 195
+_al 194
+se_ 193
+_ne 192
+_non 191
+am 190
+is 187
+ava_ 187
+_non_ 186
+non_ 186
+in_ 185
+ent 185
+_si 184
+_pa 184
+com 183
+! 182
+_le 182
+_su 181
+uo 181
+el_ 180
+!_ 180
+l' 178
+ue 177
+te_ 177
+_com 177
+are_ 176
+pr 176
+_in_ 176
+van 172
+mo 172
+ta_ 171
+gn 167
+ere 166
+na_ 166
+tto 163
+it 161
+_per_ 161
+per_ 161
+é 161
+all 160
+ess 159
+ut 159
+col 158
+acc 157
+gi 155
+lo_ 154
+oc 154
+vano 153
+io_ 153
+_av 151
+ndo 151
+é_ 151
+ato 149
+ave 148
+_st 147
+me_ 147
+'a 146
+ia_ 144
+con 143
+mp 143
+fi 142
+ett 142
+_si_ 141
+_pi 140
+era_ 140
+ti_ 140
+ó 140
+vano_ 140
+_gl 139
+qua 139
+ella 139
+sta 138
+ome 137
+S 137
+_gli 137
+_S 137
+ad 136
+_ve 134
+ant 134
+ne_ 134
+ó_ 133
+sp 133
+do_ 133
+_po 132
+ro_ 132
+ov 132
+_le_ 131
+ella_ 130
+sse 129
+_con 128
+ir 128
+_vi 128
+ig 127
+_gli_ 127
+_ave 127
+vev 127
+un_ 126
+ot 126
+veva 125
+dell 125
+que 125
+a. 125
+_o 125
+a._ 124
+tu 124
+cia 123
+za 123
+_que 123
+_da 121
+par 121
+_pr 120
+cch 120
+_dell 120
+eg 119
+_sa 119
+o._ 119
+o. 119
+_col 118
+lt 118
+_un_ 118
+rt 118
+ur 117
+_vo 117
+_me 117
+ome_ 117
+L 116
+ap 116
+_L 116
+zi 116
+nto 116
+og 115
+_an 115
+_so 115
+em 114
+ag 114
+be 111
+ni_ 111
+im 110
+cchi 110
+ver 110
+lle 109
+nz 109
+cci 109
+_ri 109
+nc 108
+_er 108
+come_ 107
+come 107
+aveva 107
+ui 107
+avev 107
+tto_ 107
+_come 106
+ed 106
+P 105
+man 105
+_P 105
+rs 105
+occ 104
+ndo_ 103
+ato_ 103
+_qua 103
+_era 103
+ari 102
+ba 100
+_mo 100
+nel 100
+id 99
+men 98
+_fi 98
+_all 98
+rr 97
+_do 97
+_avev 97
+att 97
+l'a 96
+ei 96
+zz 96
+; 96
+vol 95
+pp 95
+tra 95
+;_ 95
+ere_ 94
+lle_ 94
+nda 94
+utt 94
+est 93
+_nel 93
+ul 92
+ola 92
+iv 92
+ando 90
+ale 90
+lu 90
+rn 90
+e. 89
+e._ 89
+ll' 89
+tta 88
+nte 87
+_l' 87
+uel 87
diff --git a/libtextcat/data/new_fingerprints/lm/japanese.lm b/libtextcat/data/new_fingerprints/lm/japanese.lm
new file mode 100644
index 000000000000..654341bfeae2
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/japanese.lm
@@ -0,0 +1,400 @@
+_
+ã®
+ã€
+ã«
+ã‚’
+ã¨
+ã—
+ãŸ
+ãª
+ã¦
+。
+ã§
+ã‚‹
+ã‹
+ã™
+ã¯
+ã‚Š
+ã„
+ã¾
+ら
+ãŒ
+ 
+。_
+ã£
+ã‚‚
+_ 
+ã—ã¦
+ã™ã€‚
+1
+地
+ã‹ã‚‰
+ã†
+ã 
+ã¾ã™
+å¹´
+ー
+中
+れ
+ã‚
+ã¾ã™ã€‚
+ã€
+ãŸã€‚
+大
+分
+「
+ス
+ã—ãŸ
+ã‘
+ã“
+人
+国
+政
+ã‚“
+å­¦
+ã£ã¦
+æ–¹
+ã‚Šã¾
+ã£ãŸ
+ã™ã‚‹
+改é©
+é©
+改
+çš„
+ã¯ã€
+ã¡
+ã•
+ï¼’
+ã‚
+ã¦ã€
+部
+ã©
+生
+ã¦ã„
+ã‚¿
+会
+ン
+ï¼™
+よ
+果
+ã›
+ã‚
+ã‚‹ã¨
+地方
+ã«ã€
+行
+ã‚„
+ã™ã€‚_
+3
+力
+自
+ã¨ã—
+レ
+ç«‹
+)
+ã¨ã—ã¦
+_)
+ãªã‚Š
+ã§ã
+進
+月
+æ–°
+ã‚Šã¾ã™
+æ°´
+åŒ
+女
+下
+ã‚Šã¾ã™ã€‚
+作
+(_
+ã“ã¨
+実
+å½¹
+権
+ã„ãŸ
+(
+ã—ã¦ã„
+ル
+ク
+(
+ã¿
+ニ
+ã­
+ã¤
+ãã‚‹
+経
+ï¼–
+時
+å¼·
+家
+性
+5
+内
+調
+集
+る。
+上
+財
+改é©(
+安
+çš„ãª
+事
+å‹™
+ã€ã¨
+一
+ã£ãŸã€‚
+å­
+å…¨
+ã§ãã‚‹
+å·
+ãŸã€‚_
+次
+業
+ãªã„
+ド
+)
+ç¾
+é©(_
+ï¼
+る。_
+改é©(_
+é©(
+ã§ã™
+ã§ã€
+マ
+ミ
+ジ
+社
+ã§ã‚‚
+ç›®
+å¹´ã‹ã‚‰
+発
+çœ
+ã€ä¸­
+ã‚¢
+ã¹
+ッ
+ã°
+済
+女性
+ï¼…
+法
+ãªã‚“
+ãˆ
+ç”»
+地方分権
+ãªã
+æ°‘
+構
+æ ¹
+ã—ã€
+ãŒã€
+高
+推
+æ–½
+ã¨ã„
+何
+é›»
+調査
+éš›
+ã‚‚ã€
+和
+分権
+効
+é™
+地方分
+国ã®
+é•·
+経済
+設
+計
+方分権
+野
+é‡
+å°
+ã«ãª
+構造
+本
+æ ¡
+査
+ã—ã¾
+造
+ã¾ã™ã€‚_
+ãªã‚Šã¾
+ã‚ŒãŸ
+方分
+ã‚ã‚Š
+財政
+ç†
+ãª_
+ã—ãŸã€‚
+å¹´ã‹
+ç´„
+_ã¦
+1年
+よã†
+ç§
+・
+メ
+育
+ã‚»
+ビ
+ã‚·
+ã—ã¦ã€
+ナ
+ã‚«
+ト
+生ã®
+ç€
+ã»
+æµ
+構造改
+ã‚“ã¦
+ãƒ
+ã‚
+èŒ
+æ²»
+時ã«
+ï¼…ã€
+æ°—
+ãŠ
+ドレ
+ãªã£
+ãªã©
+ãŸã‚Š
+何ã‹
+ã‚‹ã“
+ã€ãã—ã¦
+ã¦ã
+ãŸã¡
+ã§ã¯
+ã¦ã¯
+進ã‚
+造改
+構造改é©(
+ã®å­
+率
+期
+度
+æ—¥
+部çœ
+計画
+ã_
+æ—
+æ–‡
+構造改é©
+ã¨ã„ã†
+ã—ã¾ã™
+ãªã£ãŸ
+å±€
+踊り
+ãªã‚“ã¦
+æ­£
+夫
+増
+多
+食
+使
+ã ã£ãŸ
+戻
+ããª
+ã„ã¾
+体
+スを
+題
+ãŸãŒã€
+ã„ã‚‹
+ã„ã†
+身
+ã‚‚ãª
+ã„。
+中部
+ã‹ã‘
+踊
+_ã™
+é¢
+_ã«
+ã ã£ãŸã€‚
+çš„ã«
+é–€
+ドレス
+レス
+ï¼’å¹´
+ã¾ã—ãŸ
+制
+åˆ
+ã—ã¾ã™ã€‚
+ã¾ã§
+ã¾ã—
+ã‚ã‚Šã¾
+ãã€
+冬
+ã¨ã‚’
+文部çœ
+を進ã‚
+推進
+å…ƒ
+ãã—
+é ƒ
+è¾²
+å…¥
+域
+解
+ã¦ã„ã‚‹
+らã€
+ã€ãã—
+ã¨ã€
+造改é©
+家æ—
+見
+è¦
+ã—ãŸã€‚_
+è¦
+8
+5年
+ã‚ã‚Šã¾ã™ã€‚
+葉
+ï¼ï¼…
+造改é©(
+ã‚ã‚Šã¾ã™
+å…¬
+ã¯ãª
+ã«ã‚
+ã—ãª
+çµæžœ
+表
+ã‚ãŸ
+ãã—ã¦
+ã ã‘
+ã ã£
+_ã™ã€‚
+ï¼”
+第
+ã“ã¨ã‚’
+ã„ã¾ã™
+文部
+ã®ç›®
+ã€ã
+を進
+効果
+ã‚‹ã“ã¨
+ã®ã¾
+ã‚ã‚‹
+3ï¼
+ã€åœ°
+自分
+組
+çµ
+ 「
+ã«ã‚‚
+ãŸãŒ
+造改é©(_
+ã‚‹é™ã‚Š
+ã女性ãŸ
+ナ増強
+戻る。
+ã£ãŸã€‚_
+ç¾ã™
diff --git a/libtextcat/data/new_fingerprints/lm/korean.lm b/libtextcat/data/new_fingerprints/lm/korean.lm
new file mode 100644
index 000000000000..159493270c3f
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/korean.lm
@@ -0,0 +1,400 @@
+_ 11636
+À 2659
+° 1629
+Ç 1578
+¸ 1458
+¿ 1397
+´ 1299
+µ 1118
+Ã 1005
+± 928
+» 849
+¼ 840
+_À 809
+¹ 808
+º 780
+¡ 773
+Ã 764
+³ 674
+¾ 640
+Ì 591
+ÃŽ 580
+ÀÌ 538
+½ 526
+Ã 519
+Â 518
+_° 518
+Ù 506
+· 483
+È 464
+ö 463
+­ 454
+´Ù 451
+ø 404
+Æ 395
+ë 384
+_¿ 382
+í 377
+. 374
+Ã… 373
+ç 358
+._ 347
+_Ç 344
+´Â 338
+¡_ 333
+Â_ 329
+¿¡ 327
+_Ã 319
+´Â_ 317
+´Ù. 313
+Ù. 313
+â 308
+¶ 308
+´Ù._ 301
+Ù._ 301
+»_ 299
+_¹ 291
+î 288
+® 282
+À» 279
+Ö 278
+¦ 274
+À»_ 271
+ÃŽ_ 269
+, 266
+Ñ 266
+°í 261
+Ãö 258
+,_ 258
+°¡ 250
+ß 248
+_¼ 246
+Ä 245
+ÀÇ 244
+Ì_ 243
+_¸ 243
+_µ 242
+ü 238
+é 235
+¼­ 234
+ÀÌ_ 234
+Çà 234
+_± 233
+´ë 228
+·Î 227
+ÇÑ 222
+½Ã 222
+_³ 215
+±â 212
+_Ã 210
+í_ 208
+_´ 206
+¦_ 206
+Ç_ 205
+_¾ 205
+º_ 204
+÷ 198
+˼ 198
+°í_ 194
+·Î_ 193
+ó 193
+¿¡_ 193
+¸¦_ 192
+¸¦ 192
+_ÀÌ 190
+­_ 189
+˼_ 189
+_½ 189
+µµ 188
+Àà 188
+ÀÇ_ 188
+_º 186
+Ã 177
+Ú 175
+ú 171
+ȍ 161
+Ô 154
+_Àà 153
+Ø 152
+¿ø 151
+±¸ 150
+µ¿ 147
+Ã’ 147
+¸¸ 145
+¼ö 143
+¤ 142
+ºÎ 142
+_» 141
+ÀÖ 140
+æ 139
+Ã 138
+ù 138
+ª 137
+µé 136
+è 134
+À¸ 134
+_ÀÖ 133
+² 132
+Ñ_ 127
+ÀÎ 125
+°¡_ 123
+ÀÚ 122
+ÇÑ_ 121
+Àü 121
+¾Æ 118
+ý 117
+Ã¥ 116
+Ã_ 115
+¾î 115
+µ_ 115
+¢ 114
+Ö´ 113
+°ú 112
+¸· 111
+¯ 109
+ÇØ 109
+§ 108
+à 108
+_È 108
+±¹ 107
+¼­_ 107
+× 107
+£ 106
+ÀÖ´ 105
+ȸ 103
+¸® 101
+ö_ 101
+û 100
+_ÀÖ´ 100
+õ 100
+¸·Î 100
+" 98
+À¸·Î 97
+ÃÖ 97
+À¸· 97
+Àå 95
+_´ë 94
+¡¼ 94
+³ª 94
+_½Ã 94
+Â¥ 94
+µî 94
+ä 92
+°ø 92
+_Ã… 91
+á 91
+ð 90
+Çà 90
+¸·Î_ 90
+Ê 89
+© 89
+Ã_ 89
+¸_ 89
+À¸·Î_ 89
+éÀ 89
+Þ 88
+¡¼­ 88
+¿¡¼ 88
+¿¡¼­ 88
+ê 87
+µµ_ 87
+±³ 85
+_Æ 85
+î_ 84
+°æ 84
+µéÀ 84
+¸í 84
+ëÇ 83
+¿ù 83
+_Ãö 82
+Çð 82
+¿À 81
+¶ó 80
+¿¡¼­_ 79
+¡¼­_ 79
+ÀÃ_ 78
+Ã 78
+æ 78
+ç_ 78
+øÀ 78
+' 77
+Çß 77
+ì 77
+Ü 77
+Ãß 77
+ú_ 76
+Ãö_ 76
+Ó 76
+â_ 75
+( 74
+) 74
+»ó 74
+°ü 74
+»ý 73
+_¿ù 72
+_°¡ 72
+_¼ö 72
+- 72
+¿© 72
+Ö´Ù 72
+º¸ 71
+ÀÖ´Ù 71
+Ȑ 70
+°³ 70
+½º 70
+¼± 69
+¿ë 69
+°ú_ 69
+_ÀÖ´Ù 69
+_µî 69
+ø_ 69
+ß´ 69
+°à 68
+³» 68
+_¡ 68
+_ÇÑ 68
+ù_ 67
+ü 67
+¸¿ 66
+_ÀÃ_ 66
+Çß´ 66
+°Ô 65
+ñ 65
+_¶ 65
+_°à 65
+Çß´Ù 64
+ß´Ù 64
+_Çà 63
+¹Î 62
+« 62
+ô 62
+¼Ò 62
+¿ù_ 62
+ö 61
+³â 61
+ðí 61
+Ì´ 60
+¾È 60
+Çðí 60
+õ 59
+_¿ù_ 59
+_¼­ 59
+Åë 59
+ÆÄ 58
+®_ 58
+_¸¸ 58
+Ø_ 58
+´ç 57
+ß´Ù. 57
+¬ 57
+Õ 57
+Çß´Ù. 57
+¹° 57
+¾÷ 57
+Ö´Ù. 56
+ß_ 56
+ÀÖ´Ù. 56
+¿à 56
+ß´Ù._ 56
+Ãø 56
+Àç 55
+¸° 55
+ÃÀ 55
+Æ® 55
+¹é 55
+ÀÌ´ 55
+Ö´Ù._ 54
+_¹é 54
+¿Ã_ 53
+Àû 53
+ð 53
+¹® 53
+_ÃÖ 53
+´Ü 53
+¼º 53
+ÇØ_ 53
+°Ç 53
+íÀ 52
+_Àü 52
+Ô_ 52
+¿¬ 52
+_°æ 51
+°ÃÀ 51
+°£ 51
+¿ì 51
+È­ 51
+ä 50
+_¾Æ 50
+´ëÇ 50
+Ä¡ 50
+ðí_ 49
+Çðí_ 49
+_ÀÎ 49
+Û 49
+É 49
+_" 48
+öµ 48
+ˤ 48
+ÀÌ´Ù 48
+ÀÔ 48
+_°ÃÀ 48
+Ì´Ù 48
+¡­ 47
+Ì´Ù. 47
+_°ø 47
+ÀÌ´Ù. 47
+°Ô_ 47
+_ȍ 47
+¸ç 47
+½Å 47
+Ã_ 46
+Ã…Ã 46
+é_ 46
+Ã’_ 46
+ï 46
+Çô 46
+_°³ 45
+_´Ù 45
+_ÀÚ 45
+¸¶ 45
+°è 45
+Çà 45
+Ì´Ù._ 45
+÷À 44
+îà 44
+ÈÄ 44
+±â_ 43
+(_ 43
+¸é 43
+­¿ 43
+ü_ 43
+ºñ 42
+­´ 42
+ë_ 42
+ÀÎ_ 42
+_ÀÌ_ 42
+µî_ 42
+°­ 42
+ÎÃ 41
+µÇ 41
+¿øÀ 41
+¿µ 41
+À½ 40
+Îõ 40
+±× 40
+Ë 40
+¿¹ 40
+_¿À 40
+øÇ 40
+¸» 40
+_Çà 40
+¡­_ 39
+_³â 39
+³² 39
+ÇÒ 39
+¿_ 39
+_³ª 39
+³­ 38
+¸íÀ 38
+âÀ 38
+ª_ 38
diff --git a/libtextcat/data/new_fingerprints/lm/latin.lm b/libtextcat/data/new_fingerprints/lm/latin.lm
new file mode 100644
index 000000000000..177cd35aa174
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/latin.lm
@@ -0,0 +1,400 @@
+_ 20136
+e 6892
+i 5604
+a 5443
+u 4581
+t 4552
+s 4354
+r 3923
+n 3375
+m 3063
+o 2921
+c 2224
+l 1805
+e_ 1625
+s_ 1503
+p 1424
+d 1397
+, 1285
+,_ 1276
+er 1077
+qu 1028
+q 1028
+a_ 1019
+t_ 1018
+is 942
+_a 921
+re 902
+m_ 891
+v 858
+b 821
+um 808
+_s 773
+us 772
+en 766
+nt 733
+in 729
+ue 727
+te 720
+g 718
+_i 710
+_p 679
+it 676
+_c 669
+et 653
+que 652
+_e 643
+at 643
+ue_ 616
+ra 614
+que_ 611
+f 601
+or 598
+ri 576
+ti 572
+ta 559
+tu 552
+an 551
+ae 527
+_m 513
+am 501
+_t 493
+us_ 488
+is_ 487
+es 479
+em 479
+_f 451
+um_ 443
+_v 442
+ia 442
+li 438
+_d 436
+. 432
+i_ 430
+et_ 429
+ni 412
+ne 409
+h 406
+de 404
+ur 396
+._ 392
+ar 388
+os 388
+mi 382
+pe 382
+la 376
+st 371
+s, 368
+di 367
+_et 366
+s,_ 365
+_in 363
+on 360
+o_ 359
+_n 351
+_et_ 351
+as 346
+im 336
+na 327
+se 320
+ma 315
+cu 307
+vi 306
+si 303
+ro 303
+r_ 302
+su 299
+un 295
+_l 291
+to 291
+ec 290
+ci 288
+co 287
+_r 287
+ere 286
+ce 284
+tr 280
+re_ 278
+ent 275
+x 275
+ct 274
+ve 271
+ru 259
+ul 256
+me 255
+ui 255
+c_ 252
+_o 250
+ic 249
+ns 247
+_qu 242
+_q 242
+no 241
+ant 235
+am_ 235
+_co 233
+sa 231
+ca 230
+t, 226
+mu 225
+t,_ 225
+_re 223
+el 222
+ib 222
+id 218
+om 212
+_te 211
+al 209
+le 209
+it_ 208
+mo 208
+ol 206
+_u 203
+; 199
+_h 199
+ac 198
+;_ 198
+bu 197
+nu 196
+ua 195
+n_ 195
+ll 194
+tis 191
+A 189
+rt 188
+ge 188
+nd 187
+au 187
+lu 186
+iu 185
+squ 185
+per 185
+sq 185
+ter 185
+pa 183
+_A 183
+em_ 183
+ia_ 180
+ed 179
+_pe 178
+m, 176
+sque 175
+_su 175
+ae_ 175
+m,_ 175
+pr 175
+bi 175
+bus 174
+_vi 174
+os_ 173
+ta_ 172
+mqu 171
+mq 171
+ss 170
+sque_ 169
+ibu 167
+ad 166
+ibus 165
+I 164
+nte 163
+ra_ 163
+mque 162
+_de 162
+po 161
+_se 160
+ere_ 160
+nc 160
+qua 159
+T 159
+lo 157
+oc 156
+mque_ 156
+_T 155
+_pa 155
+_pr 155
+tem 154
+bus_ 152
+nti 149
+rum 149
+er_ 149
+ab 148
+ir 148
+da 147
+_ve 146
+ibus_ 146
+ex 146
+ut 145
+pi 145
+tur 145
+_ca 143
+_me 142
+es_ 142
+gi 142
+te_ 141
+_I 141
+vo 141
+do 141
+_si 140
+tus 139
+il 137
+_ar 136
+du 133
+nt_ 133
+uc 133
+fa 132
+as_ 132
+rr 131
+ba 130
+_ad 128
+ne_ 127
+_ma 127
+ens 127
+gn 126
+s. 126
+y 126
+min 125
+ris 124
+in_ 123
+tum 123
+P 123
+_g 123
+mp 123
+e, 122
+io 122
+_P 122
+ea 122
+hi 122
+e,_ 121
+era 120
+sc 120
+_la 120
+qui 120
+unt 120
+fe 119
+_in_ 118
+_no 118
+ore 118
+iam 118
+va 117
+tis_ 117
+s._ 117
+at_ 117
+eri 116
+d_ 116
+con 115
+fu 115
+pu 114
+cum 114
+ub 114
+ng 114
+ine 113
+_au 113
+: 113
+_di 112
+ag 111
+_con 111
+ect 111
+i, 111
+equ 111
+i,_ 111
+be 111
+eq 111
+_po 110
+so 110
+:_ 110
+nis 109
+ha 109
+uo 109
+_fa 108
+na_ 107
+ip 107
+is, 107
+_cu 106
+cr 106
+ate 105
+is,_ 105
+ig 105
+tor 105
+rat 104
+_qua 103
+eg 103
+a, 103
+a,_ 102
+tra 102
+_mo 101
+sp 101
+mis 100
+itu 100
+D 99
+ali 99
+eb 99
+eni 99
+_sa 98
+ie 98
+imu 98
+_ex 97
+_D 96
+res 95
+est 94
+tri 94
+ene 94
+_mi 94
+str 94
+enti 93
+t. 92
+av 92
+_per 91
+ur_ 91
+ora 91
+lt 91
+umqu 90
+_vo 90
+umq 90
+up 89
+t._ 88
+quo 88
+_ne 88
+gen 88
+rum_ 87
+tqu 87
+tq 87
+_fu 86
+ep 86
+ma_ 86
+umque 86
+it,_ 85
+ine_ 85
+it, 85
+men 85
+mus 84
+ort 83
+ven 83
+ina 83
+us,_ 83
+us, 83
+tque 82
+_ge 82
+per_ 82
+mor 82
+inc 82
+are 81
+tus_ 81
+_an 81
+rim 81
+tque_ 81
+ot 81
+ani 80
+H 80
+_tu 80
+ho 80
+tem_ 80
+u_ 80
+ser 79
+um,_ 79
+um, 79
+S 79
+ten 79
+ver 79
+sti 79
+ntu 78
+fer 78
diff --git a/libtextcat/data/new_fingerprints/lm/latvian.lm b/libtextcat/data/new_fingerprints/lm/latvian.lm
new file mode 100644
index 000000000000..bffdd309b9bb
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/latvian.lm
@@ -0,0 +1,400 @@
+_ 24504
+a 8735
+i 7353
+s 6064
+t 5176
+e 4390
+r 4339
+u 4200
+n 3422
+â 3055
+m 2817
+p 2755
+k 2703
+o 2643
+s_ 2595
+d 2131
+l 2035
+j 1972
+î 1958
+ie 1887
+v 1822
+as 1530
+u_ 1396
+_p 1380
+b 1357
+z 1311
+ç 1267
+a_ 1261
+as_ 1217
+. 1065
+, 1048
+,_ 1033
+g 1010
+ar 970
+ð 965
+ti 962
+c 922
+_a 876
+ja 838
+st 828
+_i 822
+_v 805
+pa 792
+_t 775
+._ 774
+um 760
+_k 749
+â_ 734
+_n 728
+es 712
+i_ 706
+at 703
+is 695
+_s 680
+ai 649
+ta 635
+an 631
+ka 621
+r_ 615
+îb 586
+ij 585
+_u 568
+_pa 548
+un 541
+no 539
+va 537
+ma 525
+ra 522
+ri 515
+iz 512
+in 509
+vi 508
+pr 506
+sa 487
+ju 486
+tâ 483
+ik 473
+n_ 472
+am 454
+en 452
+tie 440
+na 432
+ða 430
+tu 420
+ija 420
+ir 419
+o_ 415
+m_ 414
+li 411
+ci 409
+ku 404
+mi 401
+_no 398
+ek 398
+_un 397
+t_ 395
+_l 394
+un_ 393
+_d 393
+ot 392
+_un_ 392
+ies 391
+ar_ 388
+nâ 387
+da 387
+ei 385
+em 382
+_ti 382
+pi 380
+al 368
+ba 356
+ts 355
+_iz 353
+ap 350
+jas 349
+ko 346
+au 345
+ðan 339
+re 339
+ni 337
+kâ 328
+ro 327
+et 326
+rî 325
+ga 323
+mu 322
+jas_ 321
+tr 317
+_va 312
+te 310
+_pr 307
+sp 298
+sk 297
+ne 295
+îba 293
+âs 289
+ijas 287
+_ar 283
+la 283
+to 282
+ad 280
+dz 278
+îg 277
+tî 277
+çj 277
+_sa 277
+jâ 274
+_m 273
+ties 273
+lî 272
+ijas_ 269
+_at 269
+ed 267
+_pi 267
+ò 267
+s, 265
+s,_ 265
+çr 261
+gu 258
+f 258
+si 257
+mâ 256
+nt 254
+tu_ 254
+_r 253
+ru 252
+âj 251
+jum 251
+âs_ 250
+ât 249
+iem 248
+_b 248
+_vi 245
+par 244
+pie 244
+rt 243
+vç 242
+_ne 240
+âr 240
+ai_ 239
+_. 239
+_j 237
+us 237
+_ka 236
+er 232
+bas 232
+_tie 232
+_par 230
+di 229
+û 228
+ms 228
+âd 226
+îbas 224
+om 220
+bu 219
+umu 217
+iek 216
+kt 216
+it 216
+râ 215
+_ko 214
+ana 211
+âk 211
+or 210
+de 210
+inâ 207
+dî 204
+ur 203
+pri 203
+_c 202
+ta_ 202
+bas_ 201
+es_ 201
+_pie 200
+il 200
+os 199
+ðana 199
+id 198
+L 198
+_ie 197
+bi 194
+ak 194
+ja_ 194
+îbas_ 193
+çt 192
+îbu 192
+_g 191
+ï 189
+ma_ 189
+ien 188
+kum 187
+— 187
+_ir 186
+on 186
+_ties 185
+zî 185
+ce 185
+ied 184
+ist 183
+ts_ 183
+_— 183
+—_ 182
+âm 181
+vie 180
+_—_ 180
+cij 180
+ka_ 179
+_ap 175
+sî 174
+ir_ 174
+zi 174
+uma 173
+âl 172
+_ir_ 172
+oð 171
+_da 169
+î_ 168
+arî 168
+) 167
+do 166
+ve 166
+( 165
+ls 165
+bu_ 164
+val 164
+mu_ 164
+isk 163
+uz 161
+av 160
+par_ 160
+_vç 159
+_par_ 158
+vçr 158
+_ar_ 157
+rie 157
+_uz 157
+aj 156
+îbu_ 156
+oj 155
+kâ_ 155
+ld 154
+iet 154
+iku 154
+ks 153
+du 153
+ep 153
+ms_ 152
+ec 152
+V 151
+ais 150
+str 150
+nie 149
+am_ 149
+ums 148
+_( 147
+ju_ 146
+z_ 142
+ru_ 142
+îj 142
+gum 142
+u,_ 141
+u, 141
+iò 141
+uma_ 141
+çrt 141
+îgu 141
+me 140
+dâ 140
+ît 140
+ent 139
+ikum 138
+pâ 138
+em_ 138
+_L 138
+lie 136
+sta 136
+rî_ 136
+lst 136
+eik 135
+se 135
+s. 134
+pro 134
+rs 134
+s._ 134
+tik 134
+lç 134
+ska 133
+pap 133
+kas 133
+rm 133
+âju 133
+pî 132
+nu 132
+T 132
+pç 131
+tei 131
+nas 131
+_V 131
+jo 131
+lîg 131
+ut 131
+iem_ 130
+ras 129
+pu 129
+_li 129
+_ga 128
+dar 128
+_kâ 128
+umu_ 127
+îv 126
+îgum 126
+kas_ 126
+tîb 125
+vai 125
+lîgu 125
+P 125
+arî_ 125
+ev 124
+lai 124
+îr 124
+lîgum 124
+tv 123
+að 123
+_arî_ 122
+_arî 122
+_vie 121
+S 121
+_T 120
+îju 120
+teik 120
+cija 119
+rb 119
+_ð 119
+jâ_ 119
+nas_ 118
+tâj 118
+vçrt 118
+iec 118
+_la 117
+îd 116
+_vai 116
+îjum 116
+vien 116
+als 116
+_lî 116
+apîr 115
+rts 115
+isi 115
+pîr 115
+papî 115
+papîr 115
+apî 115
+tsp 115
+mç 114
+im 114
+be 114
+is_ 114
+alst 114
+_ja 114
+rtspa 113
diff --git a/libtextcat/data/new_fingerprints/lm/lithuanian.lm b/libtextcat/data/new_fingerprints/lm/lithuanian.lm
new file mode 100644
index 000000000000..eca25a45b2ba
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/lithuanian.lm
@@ -0,0 +1,400 @@
+_ 23070
+i 8196
+a 7763
+s 5179
+e 3458
+o 3425
+u 3371
+t 3344
+r 3240
+n 3070
+k 3019
+l 2060
+p 1889
+s_ 1756
+m 1680
+d 1640
+v 1590
+, 1501
+,_ 1497
+ó 1400
+g 1302
+j 1200
+o_ 1138
+. 1128
+ai 1113
+÷ 1075
+_p 1047
+_k 1035
+i_ 990
+._ 984
+as 975
+b 937
+au 903
+is 863
+_n 815
+ka 811
+si 804
+_s 799
+ia 796
+ý 759
+ta 735
+ie 731
+ti 728
+y 721
+_t 711
+in 697
+_i 686
+us 675
+pa 658
+ir 640
+ar 633
+ù 620
+_a 617
+r_ 609
+ne 583
+a_ 578
+ri 569
+_v 558
+_pa 541
+al 540
+ra 531
+li 522
+Ã 509
+u_ 493
+vi 490
+_ka 489
+da 487
+_ne 482
+_j 481
+en 478
+ù_ 445
+os 444
+ki 442
+õ 441
+an 440
+ik 434
+ma 433
+as_ 430
+st 420
+_d 420
+ai_ 417
+s,_ 396
+am 396
+is_ 396
+s, 396
+_ir 390
+jo 387
+ir_ 385
+at 384
+_ir_ 383
+i÷ 381
+ni 377
+ga 376
+_b 367
+na 366
+e_ 365
+to 352
+pr 337
+_g 332
+_m 332
+û 328
+ß 326
+io 323
+ei 323
+Ã_ 321
+su 320
+ßi 320
+uo 319
+la 317
+er 303
+va 299
+vo 298
+ýi 293
+es 291
+- 290
+ó_ 286
+-_ 285
+_vi 284
+_- 283
+ko 283
+_-_ 283
+iau 281
+sa 278
+s. 278
+iu 276
+et 274
+nu 265
+ja 262
+õ_ 262
+_ta 262
+s._ 259
+el 258
+ój 258
+os_ 256
+im 255
+it 254
+_pr 253
+no 252
+av 251
+ur 251
+_÷ 248
+mi 246
+ve 245
+ak 245
+ku 243
+ek 241
+tu 241
+_ý 241
+ad 241
+ñ 238
+_l 238
+_i÷ 236
+us_ 236
+_su 232
+oj 231
+au_ 231
+ba 224
+nt 218
+me 214
+te 210
+jo_ 208
+ro 205
+ós 204
+iù 203
+tai 203
+_õ 201
+ló 201
+pas 200
+ip 198
+iai 197
+di 195
+usi 194
+vo_ 193
+kai 190
+i, 189
+i,_ 189
+sk 187
+_r 186
+aus 184
+ap 183
+gi 182
+mo 182
+uk 181
+t_ 180
+_da 177
+be 176
+ien 176
+o, 174
+il 174
+o,_ 173
+ti_ 173
+re 172
+_nu 171
+ßia 171
+_pas 169
+pri 169
+÷_ 169
+ok 168
+dó 166
+ól 162
+bu 161
+÷i 160
+on 160
+asi 159
+id 158
+ul 157
+ji 157
+aip 156
+ia_ 155
+_sa 154
+ot 154
+gal 153
+nk 153
+om 152
+p_ 151
+_pri 151
+od 151
+vis 150
+_at 150
+vie 150
+uv 150
+ab 150
+iù_ 149
+ama 149
+nó 149
+A 149
+sta 148
+kr 148
+_A 145
+ais 144
+rt 143
+J 142
+_J 142
+ójo 142
+_õ_ 141
+K 140
+_K 139
+pi 139
+avo 139
+ng 139
+mas 138
+du 138
+ug 137
+_ga 137
+ol 136
+tik 135
+ali 135
+.. 135
+_vis 135
+iek 134
+ini 133
+kad 133
+a, 133
+le 133
+kó 133
+T 133
+tó 132
+i. 132
+_T 131
+dý 131
+a,_ 131
+go 131
+ip_ 131
+aip_ 131
+ró 129
+a÷ 129
+dýi 129
+ke 128
+k_ 128
+d_ 127
+ad_ 127
+_ti 127
+_ma 126
+_va 125
+i._ 124
+z 123
+÷k 123
+iau_ 121
+m_ 120
+kar 119
+um 119
+tù 119
+_kad 118
+uý 117
+tr 117
+_kai 117
+as, 116
+og 116
+kad_ 116
+as,_ 116
+_ji 116
+rie 115
+yt 114
+_ja 114
+_ko 114
+vó 114
+ys 114
+jau 113
+ar_ 113
+pra 112
+aý 112
+ant 112
+kl 111
+tas 111
+gu 111
+_ku 110
+si_ 110
+pe 109
+eb 109
+N 109
+ójo_ 108
+_kad_ 108
+se 108
+_ki 108
+or 107
+ly 107
+lo 107
+iai_ 107
+_la 106
+ts 106
+ñs 106
+ig 105
+_N 105
+ut 105
+_u 105
+ós_ 105
+avo_ 104
+ie_ 104
+a. 104
+kt 104
+em 103
+do 103
+je 102
+ks 102
+ats 101
+_be 101
+ris 101
+l_ 101
+_vie 101
+tin 101
+ag 100
+dam 100
+_st 100
+a._ 99
+_bu 99
+? 99
+V 98
+io_ 98
+kas 98
+_jo 98
+vien 97
+_gal 97
+ót 97
+kia 97
+lia 96
+c 96
+uri 96
+_V 96
+?_ 96
+uvo 95
+ru 95
+ty 95
+ep 94
+nd 94
+lai 94
+_tai 94
+ju 93
+man 92
+o. 92
+buv 92
+tà 92
+_÷i 91
+i÷_ 91
+_i÷_ 91
+i÷k 90
+o._ 90
+_to 90
+bi 90
+up 89
+po 89
+rs 89
+_prie 88
+ù,_ 88
+aik 88
+ui 88
+_tik 88
+ij 88
+ù, 88
+pat 88
+prie 88
+I 87
+ies 87
+tai_ 87
+n_ 87
+pasi 87
+ius 87
+÷t 87
+eik 86
+_me 86
+ina 86
diff --git a/libtextcat/data/new_fingerprints/lm/luxembourgish.lm b/libtextcat/data/new_fingerprints/lm/luxembourgish.lm
new file mode 100644
index 000000000000..149c3d9d4359
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/luxembourgish.lm
@@ -0,0 +1,400 @@
+_
+e
+n
+r
+t
+i
+a
+s
+u
+er
+h
+d
+o
+l
+n_
+g
+en
+c
+ch
+_d
+m
+t_
+r_
+_a
+de
+e_
+en_
+er_
+an
+z
+é
+,
+un
+,_
+.
+ge
+b
+k
+w
+ë
+f
+._
+éi
+_de
+v
+p
+sc
+sch
+es
+nn
+ng
+te
+_v
+ne
+_g
+ue
+h_
+ie
+ch_
+m_
+ou
+i_
+ä
+s_
+_e
+ee
+el
+ze
+at
+re
+ss
+'
+vu
+_vu
+D
+le
+se
+st
+_an
+_s
+un_
+g_
+et
+_D
+_h
+he
+an_
+ec
+in
+rt
+_an_
+ro
+der
+ll
+is
+ht
+ech
+cht
+d'
+_d'
+S
+nt
+uer
+der_
+éi_
+em
+_o
+it
+L
+l_
+on
+esc
+A
+esch
+ti
+al
+us
+ier
+_z
+ra
+E
+we
+che
+_S
+ir
+ei
+ët
+äi
+vun
+_w
+me
+_m
+_A
+be
+ert
+vun_
+_vun
+_vun_
+nne
+tz
+ng_
+eb
+_der
+_E
+_der_
+ar
+_n
+rg
+n,_
+u_
+_L
+n,
+den
+eng
+um
+_b
+io
+ns
+erg
+au
+_en
+K
+M
+_ge
+dé
+as
+eg
+tze
+ung
+a_
+n.
+hu
+ner
+op
+n._
+ur
+et_
+B
+oun
+ën
+hi
+si
+bu
+nn_
+ëtz
+_dé
+déi
+_f
+inn
+de_
+_déi
+li
+tt
+ebu
+W
+ass
+uerg
+nd
+ebue
+bue
+rt_
+ëtze
+buer
+déi_
+ma
+ebuer
+_déi_
+_K
+_hu
+nge
+_M
+buerg
+ëtzeb
+zebue
+zeb
+zebu
+tzebu
+tzeb
+um_
+ioun
+ss_
+iou
+ges
+ere
+ha
+den_
+Lë
+éie
+F
+ke
+_a_
+t.
+ta
+Lëtz
+G
+Lëtze
+Lët
+t._
+en,_
+t,_
+en,
+t,
+at_
+ech_
+o_
+_de_
+ren
+ri
+ic
+ter
+_W
+_eng
+éier
+la
+ol
+rs
+scht
+ir_
+ru
+_B
+ert_
+_k
+oc
+P
+ten
+ht_
+_Lë
+nz
+em_
+p_
+ent
+wa
+cht_
+_den
+_Lët
+_Lëtz
+ich
+_G
+_den_
+tio
+il
+nner
+ger
+_as
+sse
+_op
+och
+ll_
+_ass
+R
+am
+-
+_se
+_F
+sche
+d_
+sch_
+ati
+_be
+ts
+ik
+nen
+De
+ers
+_.
+_P
+_._
+fe
+ass_
+mm
+gi
+aa
+zu
+_De
+ve
+pe
+fi
+tr
+lt
+en.
+_ass_
+eng_
+op_
+ck
+en._
+sen
+na
+rge
+ës
+kt
+ed
+_al
+äit
+so
+uro
+_op_
+gesc
+_R
+gesch
+Eur
+tiou
+Euro
+aus
+Eu
+erge
+hen
+tioun
+lec
+no
+fir_
+chen
+fir
+V
+hue
+_si
+or
+ut
+ac
+uf
+Z
+gen
+tu
+ver
+lech
+da
+mat
+_V
+_Eur
+_Eu
+wer
+lle
+_Euro
+uerge
+_ze
+éis
+J
+est
+ger_
+tt_
+_hue
+go
+_fi
+dee
+_fir_
+uet
+vum
+_zu
+ni
+_vum
+_fir
+een
+_vum_
+vum_
+huet
+ann
+_huet
+T
+I
+el_
+_wa
diff --git a/libtextcat/data/new_fingerprints/lm/malay.lm b/libtextcat/data/new_fingerprints/lm/malay.lm
new file mode 100644
index 000000000000..911d0cfef150
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/malay.lm
@@ -0,0 +1,400 @@
+_ 87128
+a 50232
+n 25424
+e 18746
+i 18605
+an 14419
+u 12470
+k 11955
+t 11875
+r 11007
+d 9856
+g 9545
+m 9390
+s 8926
+l 8631
+n_ 8004
+an_ 7095
+p 6890
+a_ 6739
+b 6645
+ng 6630
+h 5964
+da 5153
+_d 5107
+er 4625
+ka 4448
+la 4339
+y 4323
+i_ 4295
+en 4159
+ya 3800
+ang 3778
+_m 3750
+o 3593
+. 3539
+._ 3310
+at 3288
+ah 3216
+_b 3199
+_s 3177
+ta 3076
+ra 3036
+_k 2949
+g_ 2939
+ng_ 2933
+ar 2915
+_p 2906
+me 2884
+ga 2781
+di 2778
+ak 2727
+al 2705
+_me 2671
+ang_ 2524
+h_ 2509
+ba 2508
+pa 2454
+kan 2449
+in 2437
+tu 2411
+_t 2376
+sa 2338
+_da 2276
+j 2276
+pe 2225
+, 2150
+k_ 2147
+ma 2143
+se 2138
+am 2131
+kan_ 2117
+_di 2082
+,_ 2079
+ke 2048
+un 2004
+be 1947
+_a 1925
+na 1871
+ti 1868
+ri 1861
+u_ 1857
+as 1854
+ny 1827
+ha 1789
+te 1788
+_pe 1768
+em 1750
+it 1737
+_i 1732
+_ke 1711
+yan 1706
+ad 1698
+ia 1673
+yang 1673
+_y 1668
+_ya 1655
+yang_ 1653
+_se 1648
+ah_ 1646
+_yan 1639
+_yang 1639
+ala 1612
+nya 1587
+el 1576
+ik 1571
+t_ 1568
+ai 1549
+men 1531
+eng 1522
+_men 1464
+nga 1441
+dan 1366
+_be 1365
+si 1343
+uk 1328
+ada 1299
+nt 1291
+__ 1287
+ap 1276
+ua 1265
+___ 1238
+- 1213
+ja 1211
+ber 1204
+gan 1203
+_ba 1193
+____ 1189
+ni 1181
+_te 1169
+c 1143
+ran 1141
+_____ 1140
+m_ 1127
+ara 1118
+per 1099
+le 1084
+_dan 1083
+dan_ 1079
+ngan 1060
+_dan_ 1050
+ya_ 1046
+at_ 1044
+da_ 1021
+li 1016
+aka 1013
+A 999
+r_ 999
+w 997
+eb 995
+lah 980
+ata 980
+ak_ 978
+nd 974
+_ber 955
+gi 936
+is 933
+il 931
+tu_ 923
+s_ 920
+gan_ 915
+mb 913
+wa 904
+ag 903
+ngan_ 898
+ter 887
+nya_ 877
+S 873
+ek 853
+ru 852
+_l 838
+ela 828
+itu 824
+ol 822
+aha 822
+ada_ 820
+pu 812
+di_ 807
+bu 807
+am_ 804
+ur 801
+tan 790
+mp 790
+_per 786
+_sa 784
+M 782
+ut 781
+us 779
+era 779
+lam 778
+lah_ 775
+asa 767
+ki 761
+ir 759
+de 756
+enga 750
+su 748
+du 741
+id 739
+" 733
+akan 732
+apa 728
+_S 724
+ul 721
+lu 717
+ari 717
+dal 704
+et 698
+es 698
+pad 688
+_ma 688
+_M 685
+ana 684
+bi 679
+pada 673
+dala 673
+l_ 671
+ep 664
+f 662
+_di_ 658
+B 655
+ing 655
+_j 654
+ika 653
+ku 650
+_. 644
+akan_ 642
+ama 637
+pen 636
+alam 634
+eh 634
+pada_ 633
+ai_ 632
+_ter 632
+K 631
+mu 628
+ju 628
+P 626
+mem 625
+au 622
+_mem 614
+lan 612
+_._ 611
+ntu 608
+lam_ 605
+um 601
+on 600
+gk 597
+_in 597
+ngk 597
+a. 584
+meng 582
+_meng 578
+alam_ 577
+_A 576
+aa 575
+uk_ 572
+_pen 569
+ban 569
+or 569
+st 566
+ay 566
+dar 565
+_pa 564
+a._ 564
+_h 562
+bah 562
+_P 560
+D 559
+ri_ 558
+ini 552
+_de 551
+rt 550
+aan 545
+_it 542
+_itu 542
+nda 540
+eri 540
+dalam 537
+_B 533
+_dal 532
+ip 532
+_dala 532
+ta_ 528
+_u 527
+ung 525
+ih 524
+aw 520
+_n 519
+atu 517
+ila 513
+mi 513
+leh 513
+ian 512
+tuk 509
+awa 508
+gu 506
+ert 506
+engan 505
+ole 504
+_K 501
+seb 497
+ca 496
+gg 493
+_ta 489
+ra_ 488
+ngg 488
+itu_ 487
+emb 482
+ni_ 482
+ida 482
+nj 482
+_ti 479
+man 478
+den 477
+_D 474
+_ka 473
+aj 470
+oleh 468
+n. 468
+n._ 464
+ngka 464
+gka 464
+dak 464
+anga 461
+ena 459
+san 458
+pat 458
+rk 458
+( 455
+ent 454
+agi 453
+) 451
+ia_ 450
+ge 450
+ab 449
+im 447
+_ini 446
+ntuk 445
+I 445
+ar_ 440
+N 439
+aan_ 436
+_la 433
+pi 432
+baha 431
+deng 430
+han 430
+bag 429
+eh_ 429
+hu 429
+denga 428
+_o 427
+na_ 427
+T 425
+leh_ 422
+_den 422
+ka_ 419
+any 415
+ud 415
+rang 414
+anya 413
+gi_ 412
+angan 412
+a, 411
+_deng 410
+ita 409
+kat 408
+re 408
+_( 406
+tuk_ 403
+aga 401
+ne 400
+and 399
+aya 398
+_dar 397
+a,_ 396
+ro 396
+ntuk_ 393
+eba 392
+aran 390
+_" 387
+ed 385
+end 384
+ko 383
+sa_ 381
+p_ 381
+ara_ 380
+_seb 379
+alah 379
+oleh_ 379
+an. 378
+dak_ 378
+eg 378
+hi 376
+dari 375
+an._ 375
+au_ 373
+bo 373
+ti_ 371
+ula 371
diff --git a/libtextcat/data/new_fingerprints/lm/manx_gaelic.lm b/libtextcat/data/new_fingerprints/lm/manx_gaelic.lm
new file mode 100644
index 000000000000..e6eceebeb080
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/manx_gaelic.lm
@@ -0,0 +1,400 @@
+_ 36004
+e 9455
+a 8302
+y 6395
+n 6395
+h 5736
+r 4939
+s 4799
+o 4429
+i 4129
+l 3462
+y_ 2896
+g 2731
+n_ 2549
+d 2232
+t 2160
+_a 1836
+m 1823
+sh 1774
+e_ 1746
+h_ 1645
+yn 1550
+c 1531
+ee 1515
+gh 1461
+s_ 1460
+_s 1388
+ey 1345
+_e 1316
+ag 1312
+a_ 1201
+r_ 1173
+agh 1165
+in 1148
+as 1136
+_d 1136
+u 1124
+he 1060
+yn_ 1025
+oo 1012
+ey_ 1005
+_m 1002
+er 965
+v 949
+_y 941
+_v 895
+_c 891
+_as 880
+' 869
+, 866
+_n 832
+ll 828
+,_ 827
+_sh 818
+. 802
+hi 797
+as_ 770
+ee_ 768
+ne 767
+_as_ 764
+b 762
+re 735
+ay 730
+._ 730
+an 726
+ar 725
+gh_ 722
+ny 709
+en 703
+_r 697
+ch 688
+agh_ 687
+dy 686
+t_ 677
+le 667
+k 658
+er_ 616
+oi 612
+ea 607
+_t 601
+yr 596
+_er 585
+ra 574
+_dy 572
+in_ 570
+l_ 564
+f 557
+_l 556
+ha 551
+_g 548
+_ny 534
+nn 530
+" 528
+_ch 527
+_y_ 524
+ie 514
+dy_ 514
+_dy_ 513
+aa 510
+_f 509
+j 504
+sh_ 487
+oa 480
+is 478
+_h 470
+rr 468
+ny_ 467
+_ny_ 463
+_er_ 454
+ish 445
+ho 442
+ai 441
+d_ 435
+ro 423
+ht 418
+ei 417
+shi 416
+il 409
+me 408
+_ay 403
+_b 403
+la 400
+_j 400
+my 394
+va 391
+ns 386
+on 385
+_o 381
+ys 380
+_shi 379
+ia 377
+ayn 373
+_va 371
+hen 362
+she 356
+ri 345
+lle 342
+ooi 342
+mee 340
+ley 335
+_me 331
+el 330
+rt 328
+ie_ 327
+eh 324
+w 316
+_ayn 313
+al 311
+g_ 309
+ish_ 308
+lley 307
+mee_ 305
+_mee 304
+ill 301
+es 299
+na 299
+je 298
+yns 296
+C 294
+_my 291
+_she 290
+ley_ 282
+V 280
+_yn 278
+_" 278
+_mee_ 277
+ta 272
+_V 271
+ys_ 268
+- 268
+lley_ 265
+hin 264
+_ro 259
+shin 256
+_yn_ 255
+_je 255
+do 253
+va_ 253
+ne_ 253
+_va_ 252
+ns_ 252
+_shin 251
+yns_ 250
+ayns 248
+en_ 247
+che 246
+_ayns 246
+eh_ 246
+_do 242
+ad 241
+ney 240
+o_ 240
+ym 240
+ed 239
+yr_ 239
+ayns_ 237
+ur 237
+st 234
+_C 234
+rt_ 234
+'n 232
+m_ 232
+p 231
+li 231
+or 230
+ow 228
+hin_ 225
+da 225
+shen 223
+"_ 223
+'n_ 223
+Va 221
+ght 220
+shin_ 219
+tr 217
+_Va 217
+ry 216
+ve 216
+_shen 215
+ty 214
+mo 206
+_' 205
+_ve 205
+ma 203
+be 203
+te 203
+hie 203
+hey 203
+nag 202
+ll_ 201
+yl 200
+w_ 200
+ss 200
+aa_ 198
+nagh 198
+an_ 197
+io 195
+ow_ 194
+it 194
+sy 193
+ayr 193
+ney_ 192
+E 192
+sht 192
+ni 191
+_k 190
+ha_ 190
+ain 189
+u_ 189
+hy 189
+aght 188
+oo_ 188
+ree 188
+lh 187
+_tr 186
+esh 186
+_che 183
+yrt 182
+_da 182
+oar 182
+doo 181
+k_ 181
+se 180
+au 180
+ille 179
+ar_ 179
+_lh 179
+ki 177
+arr 176
+ec 176
+ol 175
+_doo 175
+T 175
+row 175
+_row 174
+ge 173
+so 172
+oy 171
+oil 170
+_re 170
+_ag 170
+'e 169
+rey 169
+illey 169
+ck 168
+ad_ 168
+ann 168
+n, 166
+eea 166
+_ta 166
+ht_ 165
+ae 162
+_row_ 162
+! 162
+row_ 162
+ane 161
+fe 161
+dd 160
+go 159
+tyn 159
+oin 158
+ooa 158
+n. 158
+eg 156
+_ec 156
+_ma 156
+_agh 155
+n,_ 154
+_fe 154
+Ch 154
+nyn 153
+fo 152
+eay 152
+nagh_ 152
+n._ 152
+_go 151
+S 150
+ke 150
+hey_ 150
+enn 150
+cha 149
+rre 149
+_fo 149
+ghe 149
+raa 149
+G 148
+lan 148
+mm 147
+ym_ 147
+A 146
+c_ 146
+oill 145
+hee 144
+ooar 144
+_E 144
+nne 143
+tey 142
+ir 141
+de 141
+hyn 140
+_he 140
+nyn_ 140
+'s 139
+_gh 139
+_cha 138
+L 138
+yrt_ 138
+_Ch 137
+e, 137
+lla 136
+Va_ 136
+ooin 136
+ell 135
+a' 135
+os 135
+_oo 134
+am 134
+rish 133
+th 133
+_mo 133
+ris 133
+iag 133
+gg 133
+_Va_ 132
+iagh 132
+_G 132
+e,_ 132
+ass 132
+!_ 132
+my_ 132
+hoo 131
+_T 131
+nee 130
+a'n_ 129
+rag 129
+a'n 129
+_'s 129
+_so 128
+co 128
+rey_ 128
+_eh 126
+_my_ 126
+nni 126
+ou 126
+_'sy 125
+'sy 125
+_p 125
+vo 125
+_S 125
+H 125
+_agh_ 125
+tra 124
+moo 124
+hu 123
+ooy 123
+ragh 123
+al_ 123
+it_ 123
+hia 122
+id 122
diff --git a/libtextcat/data/new_fingerprints/lm/marathi.lm b/libtextcat/data/new_fingerprints/lm/marathi.lm
new file mode 100644
index 000000000000..479f4fd9b760
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/marathi.lm
@@ -0,0 +1,400 @@
+þ 17815
+_ 14534
+þþ 4035
+þ_ 3087
+· 2180
+ø 1762
+·þ 1669
+ú 1654
+¡ 1534
+¥ 1480
+¡þ 1433
+Å 1342
+£ 1289
+ˆ 1237
+› 1163
+­ 1158
+¬ 1153
+þþ_ 1133
+¿ 1099
+¨ 1084
+Ÿ 1062
+ 1059
+ú_ 1054
+¥þ 1054
+ˆÅ 1050
+¡þþ 1039
+þú 989
+œ 983
+›þ 968
+‚ 965
+¹ 949
+_‚ 936
+. 936
+¨þ 915
+þ¿ 859
+þ£ 842
+¬þ 838
+Ÿþ 836
+þ· 769
+þ 764
+µ 710
+þ¥ 710
+œþ 684
+þ·þ 681
+._ 670
+þú_ 637
+½ 609
+‚þ 578
+_Ÿ 578
+_‚þ 569
+_ˆ 543
+þ 530
+_· 530
+µþ 524
+_œ 523
+Š 511
+ 504
+ø_ 495
+·þ_ 477
+þ¥þ 476
+þ­ 474
+_¹ 474
+_¬ 457
+_ˆÅ 447
+_Ÿþ 447
+¡þþ_ 446
+þø 444
+¸ 438
+Šþ 423
+¥þþ 422
+þ 409
+þ¨ 388
+Åþ 387
+þþ 381
+£_ 366
+ˆÅþ 359
+é 339
+þ¨þ 334
+_¬þ 332
+þµ 331
+þ› 324
+›þþ 320
+_œþ 320
+Ÿþþ 318
+ª 315
+_­ 315
+›þ_ 314
+Û 313
+þ¹ 306
+þ£_ 303
+þ¬ 301
+þˆ 297
+ 297
+þþ 295
+ªþ 284
+þ¡ 280
+§ 274
+¿_ 272
+þþ· 270
+þ¡þ 268
+¥þþ_ 268
+£þ 264
+þ 264
+­ú 263
+, 258
+þþ·þ 256
+þ¥þþ 256
+þ·þ_ 256
+,_ 255
+þˆÅ 255
+Ù 249
+¬þþ 247
+þþ¿ 243
+þþ£ 241
+_› 234
+_·þ 233
+·þþ 232
+þœ 232
+·¡ 231
+þ¿_ 231
+þ›þ 230
+þ¬þ 228
+Ä 227
+š 226
+þµþ 225
+½_ 223
+·¡þ 221
+­½ 220
+_ 220
+¥ø 219
+µþ_ 218
+¨þþ 217
+þ. 216
+þŸ 215
+_›þ 213
+.. 209
+_Ÿþþ 209
+þþþ 205
+þþ 205
+·¡þþ 204
+‰ 204
+·þú 203
+þ¥þþ_ 202
+Å_ 202
+þþ¥ 201
+ê 198
+_¨ 198
+þŠ 193
+_¡ 191
+þœþ 190
+þ._ 189
+¡þ 188
+¡ 188
+¡þþ 187
+Ï 186
+ø· 186
+ž 185
+­ú_ 180
+þ_ 180
+_þ 180
+‰þ 179
+Ú 178
+_ˆÅþ 176
+_·¡þ 175
+_·¡þþ 175
+­þ 175
+_·¡ 175
+þŠþ 174
+þ­ú 171
+¥¡ 170
+_ 170
+... 168
+¥¡þ 167
+þú 166
+_¥ 164
+þŸþ 164
+‚þ­ 163
+¥¡þþ 163
+žþ 162
+ø·þ 162
+_‚þ­ 162
+¡þþ_ 161
+­ø 160
+ˆÅ_ 159
+þþ¥þ 159
+þ­½ 157
+œþþ 154
+þ 154
+_þ 154
+_¸ 154
+ø¥ 153
+þþ 153
+šþ 151
+þ£þ 151
+_Š 151
+‚þ­½ 150
+þ§ 150
+·ø 150
+ø. 149
+_‚þ­½ 149
+‚¬ 148
+¥þú 148
+£ú 147
+œÏ 147
+Å£ 146
+þú_ 146
+_‚¬ 145
+ø._ 145
+þþ¨ 144
+þÄ 143
+‹ 142
+ 140
+¡þþ 140
+¹¨þ 139
+¹¨ 139
+þþ­ 137
+þþ› 136
+_­ø 136
+Ÿ­ 136
+_¡þ 135
+ú. 135
+·þø 135
+' 135
+ˆÅ£ 134
+_¥þ 132
+þþ¨þ 131
+þþ·þ_ 131
+µþþ 129
+'_ 129
+_›þþ 129
+ú._ 128
+þÛ 127
+.... 127
+˜ 127
+¿ 126
+þþ¬ 125
+_¨þ 124
+¡þ_ 123
+þ·þþ 123
+« 122
+; 122
+_œÏ 121
+_¡þþ 119
+» 118
+¬þ¿ 115
+¥þ¿ 114
+¥þ_ 114
+þþŠ 113
+þ¥¡þ 113
+þ¥¡ 113
+b 113
+þ£ú 112
+_' 112
+s 111
+& 111
+þþ 111
+‚¬þ 111
+þþú 111
+p 111
+ê› 111
+_£ 110
+þ¥¡þþ 110
+bs 109
+ê›þ 109
+n 109
+º 109
+_‚¬þ 108
+&n 107
+bsp; 107
+nbsp; 107
+sp; 107
+nb 107
+bsp 107
+nbs 107
+sp 107
+nbsp 107
+&nb 107
+&nbsp 107
+p; 107
+&nbs 107
+þþþ 106
+_'_ 106
+þ¥ø 106
+·þ£ 106
+ø¥þ 106
+þþ_ 105
+¨þ£ 104
+þ¿ 104
+˜þ 104
+þ¸ 104
+_Šþ 103
+þ¬þþ 103
+›ø 103
+­ø· 102
+þþŠþ 101
+þþ­ú 100
+þ›þþ 99
+þ, 99
+_ 98
+þ,_ 98
+_ˆÅ£ 98
+‚þœ 98
+þþú_ 98
+_þþ 97
+;& 97
+_‚þœ 97
+‚þœþ 97
+_‚þœþ 96
+þ 96
+¡þþþ 96
+¬þ· 96
+‡ 96
+;&nb 95
+p;&nb 95
+;&n 95
+p;& 95
+;&nbs 95
+bsp;& 95
+..... 95
+ê›þ_ 95
+sp;&n 95
+sp;& 95
+p;&n 95
+° 94
+_‡ 94
+ƒ 94
+¥¡þþ_ 94
+þþ¬þ 93
+Ÿþú 93
+þµþ_ 93
+£ú_ 93
+œþµ 93
+_ž 92
+µ¡ 92
+_­ø· 92
+þ­ú_ 92
+µ¡þ 92
+·þú_ 91
+¹­ 91
+­½_ 90
+_¬þþ 90
+_œþþ 90
+þþ¥þþ 90
+„ 90
+µ¡þþ 90
+‹þ 89
+¹· 89
+þ¡þ 89
+þ£ú_ 89
+þÚ 89
+þ¡ 89
+þˆÅþ 89
+¿Š 89
+Šþþ 89
+_Ÿ­ 88
+½Å 88
+þ¡þþ 88
+_¹¨ 88
+_¹¨þ 88
+­ø·þ 87
+ø 87
+ˆ½ 87
+›þþ­ú 87
+_‹ 87
+­µ 87
+›þþ­ 87
+œþµþ 86
+¥þú_ 86
+_‚þ¹ 85
+‚þ¹ 85
+£· 85
+_„ 85
+¹·þ 85
+_›þþ­ 84
+¡ø 84
+þ¹µþ 83
+þþµ 83
+¹µþ 83
+¹µ 83
+þ¹µ 83
+þþ_ 83
+_ª 82
+_‚þ¹µ 82
+þ¹µþ_ 82
+›þþ_ 82
+‚þ¹µþ 82
+‚þ¹µ 82
+Ÿþú_ 82
+_þ 82
+¹µþ_ 82
+þþ£_ 81
+øˆ 81
+½._ 80
+¿Šþ 80
+_·þ£ 80
+½. 80
+ú 79
+_žþ 79
+œþµþ_ 79
+² 79
diff --git a/libtextcat/data/new_fingerprints/lm/middle_frisian.lm b/libtextcat/data/new_fingerprints/lm/middle_frisian.lm
new file mode 100644
index 000000000000..17e4f149d122
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/middle_frisian.lm
@@ -0,0 +1,400 @@
+_ 60524
+e 20138
+n 10664
+t 8674
+i 7833
+a 7549
+o 7278
+r 6963
+n_ 5391
+s 5087
+d 4731
+t_ 4327
+l 4306
+e_ 4271
+en 4068
+k 3483
+m 3346
+g 2990
+er 2965
+en_ 2835
+y 2597
+w 2481
+h 2475
+j 2441
+_d 2375
+r_ 2250
+u 2059
+s_ 1921
+ie 1780
+_h 1725
+z 1710
+de 1666
+_m 1652
+_w 1629
+_i 1611
+et 1550
+te 1540
+b 1538
+. 1517
+er_ 1481
+in 1420
+an 1408
+p 1392
+f 1311
+, 1280
+,_ 1273
+k_ 1266
+._ 1258
+y_ 1238
+_z 1230
+oe 1162
+v 1144
+et_ 1138
+ne 1131
+ee 1125
+st 1120
+_o 1107
+_s 1107
+_e 1076
+_n 1068
+_b 989
+_t 988
+l_ 983
+oo 981
+ge 975
+je 963
+_g 952
+at 949
+me 946
+ij 943
+is 913
+he 899
+' 892
+aa 882
+el 870
+_v 864
+re 843
+ar 837
+_de 805
+on 800
+ke 796
+de_ 788
+ei 782
+_a 776
+_' 770
+le 744
+at_ 742
+it 741
+_k 738
+or 734
+an_ 729
+in_ 723
+da 715
+te_ 703
+_he 680
+_l 664
+H 661
+al 652
+_H 652
+a_ 650
+_da 648
+D 648
+d_ 646
+_D 643
+es 617
+g_ 608
+is_ 606
+æ 596
+_f 594
+'t 591
+_'t 584
+'t_ 583
+_me 580
+c 576
+_'t_ 576
+ri 571
+_en 564
+yn 563
+_en_ 559
+ze 538
+m_ 535
+om 535
+ik 522
+_de_ 522
+ed 521
+be 517
+wi 509
+ch 509
+ol 498
+ar_ 498
+ha 496
+_ne 495
+we 494
+ou 486
+nd 474
+ma 474
+ik_ 470
+J 459
+_ik 456
+_ik_ 451
+_J 451
+ll 450
+M 446
+li 443
+wa 442
+_M 442
+ien 435
+ro 432
+di 421
+nn 418
+ste 415
+wo 415
+yn_ 415
+_r 413
+_ha 411
+it_ 409
+tt 408
+ve 407
+S 404
+_j 399
+_S 398
+_in 395
+as 395
+der 395
+hi 389
+_al 388
+uw 384
+tte 384
+ng 381
+_wi 381
+nne 381
+het 380
+_het 378
+lle 376
+nt 374
+ns 373
+op 373
+je_ 367
+E 366
+ek 363
+B 363
+dat 362
+_B 362
+_ma 362
+_in_ 361
+_dat 359
+_wo 359
+ier 358
+og 357
+_E 354
+_ge 352
+dat_ 349
+ne_ 349
+eer 349
+het_ 348
+_het_ 348
+_is 348
+_dat_ 348
+ey 347
+W 346
+_W 343
+_te 339
+eg 337
+ra 335
+rs 335
+! 332
+zi 332
+gt 329
+_He 327
+He 327
+_be 324
+j_ 320
+ij_ 320
+u_ 319
+f_ 317
+_di 316
+; 315
+_we 315
+_is_ 314
+rt 313
+!_ 313
+;_ 312
+n. 311
+p_ 310
+la 310
+_wa 310
+ea 308
+_u 308
+pe 304
+ta 304
+il 301
+my 300
+ig 300
+n, 300
+n,_ 298
+iet 297
+sc 297
+ter 296
+sch 296
+oor 295
+no 294
+_my 293
+n._ 291
+ti 290
+_zi 289
+st_ 288
+el_ 283
+gen 282
+se 280
+K 279
+_oo 278
+ui 278
+e, 275
+ten 274
+net 274
+oon 274
+ho 274
+jn 273
+e,_ 273
+ijn 272
+ien_ 272
+ko 272
+N 271
+ni 271
+_N 269
+den 269
+za 268
+_net 267
+i_ 266
+wol 266
+een 264
+va 264
+am 264
+do 263
+ol_ 262
+le_ 261
+_te_ 260
+rd 260
+ke_ 259
+ey_ 259
+ers 258
+_K 255
+_ie 255
+ver 254
+to 254
+_hi 253
+nde 253
+: 253
+der_ 252
+jo 251
+net_ 251
+al_ 250
+_wol 250
+_p 250
+_no 250
+aar 248
+_za 245
+_net_ 245
+nk 242
+est 241
+om_ 241
+_va 241
+ak 241
+tj 239
+ae 238
+_op 237
+mo 236
+tr 236
+_st 233
+ier_ 233
+_ve 233
+mm 232
+T 231
+eu 229
+_ze 228
+:_ 228
+mi 228
+ag 227
+zo 227
+_ko 226
+vo 225
+_T 225
+nne_ 225
+F 224
+.. 224
+fo 224
+_F 223
+en, 223
+oe_ 222
+wie 222
+en,_ 221
+kk 221
+_ien 220
+on_ 218
+wol_ 218
+_fo 218
+I 217
+_do 216
+eit 214
+ei_ 214
+mme 214
+G 213
+_I 213
+as_ 213
+or_ 211
+A 211
+_G 210
+_A 210
+lle_ 209
+Da 208
+_Da 207
+_wol_ 207
+ren 206
+_ee 206
+e. 205
+van 205
+jen 205
+een_ 205
+_van 204
+kke 204
+en. 204
+O 204
+_vo 204
+V 203
+_O 203
+_V 203
+ken 203
+_mo 203
+ot 202
+ie_ 201
+ou_ 201
+ur 201
+tte_ 200
+em 200
+_van_ 199
+L 199
+van_ 199
+_zo 199
+op_ 199
+_L 197
+iet_ 196
+of 194
+ten_ 194
+oer 193
+_ien_ 193
+e._ 193
+oed 192
+hie 192
+zy 191
+en._ 191
+De 191
+_ver 191
+_ho 189
+_De 189
+oa 189
+den_ 188
+_zy 188
+lo 187
+dan 187
+nt_ 186
diff --git a/libtextcat/data/new_fingerprints/lm/mingo.lm b/libtextcat/data/new_fingerprints/lm/mingo.lm
new file mode 100644
index 000000000000..4d1947a33345
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/mingo.lm
@@ -0,0 +1,400 @@
+_ 3156
+' 991
+a 829
+t 788
+k 788
+n 752
+e 640
+h 602
+ë 551
+s 495
+w 466
+ö 422
+y 403
+u 397
+'_ 368
+_n 330
+i 269
+. 269
+._ 240
+e' 238
+ô 219
+a' 209
+ne 209
+e_ 194
+_ne 184
+wa 172
+_k 164
+kw 162
+á 154
+e'_ 146
+_t 145
+'t 141
+" 141
+ë' 134
+ê 132
+_h 131
+é 129
+ak 129
+ne_ 126
+ta 123
+_ne_ 123
+at 121
+u' 121
+hu 118
+_u 108
+ka 107
+í 106
+nö 105
+N 103
+ö' 100
+hs 95
+ha 95
+te 94
+ya 93
+_ë 92
+kh 90
+'k 86
+ú 85
+æ 84
+ni 84
+'. 84
+wë 83
+ny 83
+sh 80
+_N 80
+'._ 76
+ö_ 76
+kë 72
+_" 71
+ën 70
+th 69
+yu 66
+_w 66
+ëh 66
+t_ 66
+ô_ 66
+Ne 65
+Ne' 65
+'s 64
+ne' 64
+"_ 64
+_. 63
+_._ 61
+Ne'_ 61
+, 60
+ne'_ 58
+në 58
+kwa 57
+_ne' 57
+öt 57
+a't 57
+ek 56
+s_ 56
+ët 55
+i' 55
+_hu 54
+T 54
+ë_ 54
+_s 54
+_Ne 53
+të 53
+tö 53
+_Ne' 53
+_ne'_ 53
+ht 53
+- 53
+ts 52
+ya' 52
+ë'_ 51
+_wa 51
+_Ne'_ 51
+'ö 50
+íy 50
+_ka 50
+as 50
+ní 49
+un 49
+ê_ 49
+ty 48
+hu_ 48
+ke 48
+u_ 48
+,_ 47
+yô 46
+he 46
+ye 46
+kê 45
+si 45
+nô 44
+khu 44
+a'k 44
+_a 43
+ák 43
+wat 42
+'ë 42
+nö' 42
+wá 42
+aw 41
+an 41
+we 41
+ôt 41
+i_ 41
+_kh 41
+tak 41
+_te 41
+ik 41
+_khu 40
+ës 40
+yö 40
+k_ 39
+khu_ 39
+yu' 38
+hö 38
+wën 38
+n- 38
+ëhs 38
+_ha 38
+wa' 38
+_n- 37
+sa 37
+? 37
+_T 37
+æ' 37
+_un 36
+hô 36
+wé 36
+ah 36
+_khu_ 35
+iy 35
+ëk 35
+ut 35
+ök 35
+öh 35
+te' 35
+u'_ 35
+_kë 34
+yô_ 34
+ha' 34
+st 34
+ti 34
+ta' 34
+u't 33
+ya't 33
+'t_ 33
+'ö_ 33
+akw 33
+ôk 33
+níy 32
+'ta 32
+ku 32
+ui 32
+_sh 32
+aa 31
+én 31
+ay 31
+ënö 31
+â 30
+se 30
+tw 30
+yë 30
+(_ 29
+_(_ 29
+_)_ 29
+us 29
+_ëhs 29
+( 29
+kö 29
+_ëh 29
+ae 29
+hë 29
+_) 29
+)_ 29
+_( 29
+sy 29
+) 29
+ëë 28
+ôn 28
+sk 28
+tá 28
+té 28
+tk 28
+ên 28
+kê_ 28
+a_ 28
+án 27
+_huik 27
+ön 27
+_hui 27
+kwé 27
+huik 27
+ék 27
+hui 27
+uik 27
+_na 27
+na 27
+a'_ 26
+uikê 26
+_ni 26
+." 26
+ææ 26
+wi 26
+huikê 26
+ikê 26
+nya 25
+."_ 25
+?" 25
+të' 25
+íyu 25
+awë 25
+sn 25
+në_ 24
+ö'_ 24
+ikê_ 24
+ekh 24
+'ke 24
+uikê_ 24
+?"_ 24
+tek 24
+êt 24
+', 24
+"N 23
+_ta 23
+'a 23
+hk 23
+tê 23
+tsi 23
+wö 23
+yu'_ 23
+_"N 23
+íyu' 23
+níyu 23
+a'ta 23
+é' 23
+níyu' 23
+shô 22
+wëní 22
+kwa' 22
+ëní 22
+kwë 22
+_u' 22
+ey 22
+ëníyu 22
+K 22
+ai 22
+_shô 22
+úw 22
+aö 22
+ëht 22
+ëníy 22
+kwe 22
+wëníy 22
+_he 22
+_te' 21
+ún 21
+s. 21
+nöh 21
+ëö 21
+_ëk 21
+'sh 21
+kë' 21
+htö 21
+H 21
+ata 21
+U 20
+'h 20
+_ha' 20
+ææ' 20
+ae_ 20
+a'ke 20
+ény 20
+unö 20
+we' 20
+wë' 20
+i'_ 20
+kës 20
+ya'ta 19
+es 19
+awëní 19
+hô_ 19
+uk 19
+awën 19
+hw 19
+ë'. 19
+ô' 19
+tö' 19
+sé 19
+ë'ë 19
+ë'._ 19
+'ëë 19
+hkw 19
+-a 19
+twa 19
+'kw 18
+he_ 18
+_K 18
+nöt 18
+hsa 18
+hsi 18
+ôni 18
+aya 18
+is 18
+ëë' 18
+kæ 18
+skw 18
+uw 18
+',_ 18
+e't 18
+kææ 18
+ka' 18
+nae 17
+shô_ 17
+sat 17
+niy 17
+ëts 17
+ath 17
+ye' 17
+í_ 17
+'ëë' 17
+'u 17
+_ët 17
+ës_ 17
+_kës 17
+_y 17
+kak 17
+ö'ö 17
+kat 17
+_th 17
+'ë_ 17
+út 17
+u'k 17
+_H 17
+tekh 17
+s._ 17
+wê 17
+_nae 17
+kwá 17
+_shô_ 17
+yê 17
+öte 16
+sha 16
+ke_ 16
+k. 16
+si' 16
+hö' 16
+yá 16
+hta 16
+kn 16
+_nae_ 16
+hse 16
+ké 16
+ë'ë_ 16
+unë 16
+nya' 16
+nae_ 16
+nê 16
+á' 16
+ás 16
+tô 16
+a's 16
+a'u 16
diff --git a/libtextcat/data/new_fingerprints/lm/mongolian_cyrillic.lm b/libtextcat/data/new_fingerprints/lm/mongolian_cyrillic.lm
new file mode 100644
index 000000000000..7ac690c8ac2e
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/mongolian_cyrillic.lm
@@ -0,0 +1,363 @@
+_ 77671
+о 60289
+г 57097
+Ó© 51540
+л 41421
+а 40332
+Ñ€ 35484
+Ñ… 31764
+н 31695
+д 30802
+Ñ 29381
+и 28336
+Ò¯ 26898
+й 25901
+у 21671
+Ñ‚ 21639
+Ñ 21118
+ч 19759
+оо 12185
+м 12159
+Ó©Ó© 11964
+ц 11468
+гү 10443
+үй 10090
+чи 9604
+н_ 9584
+гүй 9529
+в 9133
+_Ñ… 8958
+й_ 8712
+ор 8462
+з 8134
+г_ 8106
+ий 8078
+Ó©Ñ€ 7638
+б 7524
+ол 7372
+ÑÑ 6997
+_Ñ‚ 6942
+уу 6908
+их 6787
+йг 6584
+ог 6380
+го 6337
+чих 6301
+өл 6221
+_о 6219
+д_ 6092
+_Ó© 6085
+Ò¯Ò¯ 5978
+Ñ€_ 5841
+гө 5674
+ш 5656
+өг 5647
+_б 5547
+аа 5471
+он 5259
+_з 5115
+Ð³Ñ 5047
+нг 5025
+Ñ…Ó© 4840
+хо 4806
+ж 4794
+ар 4704
+л_ 4687
+до 4578
+ал 4489
+йн 4345
+е 4326
+өн 4232
+Ñ_ 4191
+дө 4122
+лг 4089
+то 4085
+ан 4062
+чх 4039
+а_ 3970
+о_ 3935
+үйг 3903
+ул 3888
+гүйг 3887
+га 3756
+_Ñ 3746
+_ц 3600
+ÑÑ_ 3567
+гÑÑ 3545
+аг 3529
+гч 3515
+Ñо 3502
+Ñ‹ 3455
+цг 3441
+Ó©_ 3396
+лд 3389
+ха 3367
+ла 3290
+оо_ 3290
+ло 3281
+ги 3278
+Ñ_ 3252
+Ñ…_ 3249
+ийн 3227
+ÑÑ€ 3212
+ро 3204
+_д 3176
+_м 3157
+Ó©Ó©_ 3138
+рч 3048
+Ð¹Ð³Ñ 3033
+йн_ 3031
+йгÑÑ 3027
+ÑÓ© 3006
+үл 2995
+Ò¯Ð¹Ð³Ñ 2931
+үйгÑÑ 2931
+Ð³Ò¯Ð¹Ð³Ñ 2929
+та 2922
+үй_ 2876
+йг_ 2861
+_г 2851
+гүй_ 2831
+Ñ‚Ñ 2816
+ай 2795
+гоо 2759
+да 2724
+уул 2723
+рд 2687
+Ð¾Ñ 2650
+аа_ 2642
+гөө 2602
+рг 2598
+Ñг 2567
+Ñй 2566
+гий 2547
+Ñл 2546
+_ш 2543
+дг 2513
+Ñ€Ó© 2478
+ли 2469
+лө 2462
+_а 2461
+ох 2455
+Ñй_ 2448
+к 2440
+ийг 2425
+Ñ‚Ó© 2423
+ур 2418
+үүл 2406
+оро 2401
+_хо 2378
+Ó©Ñ… 2370
+лч 2367
+_Ñ…Ó© 2359
+од 2253
+Ó©Ñ 2252
+ра 2246
+_то 2227
+_н 2215
+Ð»Ñ 2177
+Ñ‚_ 2162
+лц 2141
+Ó©Ñ€Ó© 2123
+онг 2044
+Ó©Ó©Ñ€ 2037
+ийн_ 2022
+_ор 2015
+Ñа 2014
+зо 1998
+хг 1989
+_Ó©Ñ€ 1989
+ин 1981
+нх 1972
+ов 1970
+Ñ 1960
+гд 1959
+дог 1959
+хгү 1951
+хгүй 1948
+цо 1931
+ма 1925
+үр 1920
+лт 1919
+өнг 1896
+нд 1895
+дөг 1882
+ил 1878
+оол 1876
+оор 1871
+уд 1867
+ийг_ 1864
+ба 1859
+на 1852
+_у 1850
+мо 1818
+зө 1816
+өөл 1813
+хи 1810
+өд 1804
+мө 1794
+хд 1790
+өв 1772
+_Ò¯ 1767
+Ñн 1758
+в_ 1757
+Ð´Ñ 1757
+Ñон 1745
+_зо 1734
+_Ñ‚Ó© 1728
+цө 1726
+Ñ‚Ñй 1722
+но 1719
+ам 1716
+ÑÑ€_ 1707
+Ð¾Ð¾Ñ 1697
+ц_ 1696
+ав 1681
+дч 1680
+дчи 1668
+_зө 1665
+Ó©Ó©Ñ 1645
+нгү 1634
+Ñөн 1631
+дчих 1631
+Ñ‚Ñй_ 1623
+Ñ…Ó©Ó© 1610
+йл 1600
+ÑÑÑ€ 1583
+рл 1572
+гу 1569
+Ñ…Ñ 1565
+рчи 1555
+ÑÑ 1543
+ши 1514
+жи 1503
+ь 1492
+гÑÑ_ 1481
+_ха 1478
+гийн 1477
+ихд 1476
+цго 1474
+_цо 1470
+оог 1470
+цгоо 1466
+өөг 1465
+чихд 1464
+хоо 1463
+ри 1457
+ан_ 1447
+ай_ 1440
+ой 1438
+ни 1428
+ÑÑÑ€_ 1427
+лий 1417
+нгүй 1410
+үд 1402
+цгө 1402
+цгөө 1400
+_цө 1396
+ын 1383
+_ба 1378
+гг 1372
+оч 1372
+ггү 1368
+ггүй 1368
+өгч 1353
+ом 1350
+иг 1335
+огч 1320
+нө 1313
+ууд 1312
+Ñ…Ñ 1300
+ÑÑ 1298
+йд 1296
+ар_ 1289
+Ñ€Ñ 1288
+Ð°Ñ 1287
+ад 1282
+ч_ 1280
+ж_ 1278
+аг_ 1271
+өч 1269
+_Ñ 1268
+Ñн 1268
+ху 1267
+роо 1266
+ыг 1260
+ыг_ 1253
+лгү 1251
+гт 1249
+ÑÑ_ 1246
+_ол 1238
+чд 1230
+Ñ€Ó©Ó© 1223
+бу 1220
+йнх 1203
+ÑÑÑ 1203
+ын_ 1201
+бо 1196
+ид 1194
+ийнх 1188
+гчи 1184
+ороо 1181
+Ñ€Ñ 1179
+ат 1174
+оÑо 1170
+Ð¸Ñ…Ñ 1168
+дги 1166
+дгий 1166
+Ñ‡Ð¸Ñ…Ñ 1158
+ах 1155
+ÑÑÑ_ 1154
+ту 1153
+уг 1152
+лгүй 1150
+_бу 1139
+Ñ‚Ò¯ 1129
+_өл 1129
+өрч 1124
+цоо 1121
+лз 1119
+нго 1119
+гц 1110
+Ó©Ñ€Ó©Ó© 1100
+йгÑÑ_ 1098
+Ð½Ñ 1096
+ор_ 1094
+онго 1090
+за 1089
+оло 1087
+от 1085
+лчи 1079
+ал_ 1066
+өлг 1066
+гÑÑÑ€ 1063
+олг 1062
+Ñ…Ò¯ 1060
+цөө 1059
+тл 1054
+_ху 1054
+_мө 1052
+чхө 1050
+он_ 1047
+үүд 1046
+гÑÑÑ€_ 1046
+лу 1040
+аар 1036
+п 1026
+рх 1025
+рчих 1024
+Ð»Ñ 1021
+йгÑÑÑ€ 1020
+гчд 1019
+Ñв 1016
+вл 1014
+лчх 1014
+орд 1013
+орч 1013
+ихг 1012
+ихгү 1012
+ихгүй 1012
+йт 1010
+нгө 1008
+йд_ 1005
+өнгө 1004
diff --git a/libtextcat/data/new_fingerprints/lm/nepali.lm b/libtextcat/data/new_fingerprints/lm/nepali.lm
new file mode 100644
index 000000000000..5d3507646f93
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/nepali.lm
@@ -0,0 +1,400 @@
+_ 7044
+f 2698
+] 1196
+g 933
+s 815
+l 781
+/ 698
+; 662
+k 645
+d 635
+]_ 630
+f_ 611
+sf 549
+f] 541
+n 526
+t 460
+o 412
+j 399
+f]_ 374
+L 365
+_; 364
+x 356
+_k 353
+{ 337
+b 323
+u 321
+' 303
+df 299
+sf] 290
+_l 283
+sf]_ 265
+g] 245
+G 232
+e 231
+| 227
+a 222
+/_ 222
+L_ 211
+_g 210
+c 207
+df_ 207
+{_ 207
+_c 201
+g_ 201
+kf 184
+f/ 177
+_u 171
+/f 168
+P 163
+_/ 162
+k| 161
+_s 161
+} 159
++ 158
+h 151
+fn 150
+n] 145
+of 141
+tf 140
+: 136
+p 136
+t_ 134
+. 133
+_k| 133
+._ 133
+_e 131
+_d 131
+_f 129
+_._ 127
+_. 127
+n]_ 123
+;f 119
+lj 118
+O 118
+? 118
+q 118
+nf 118
+y 116
+_a 113
+m 111
+Ps 110
+r 110
+fg 106
+w 105
+sf_ 103
+lg 102
+g]_ 100
+gf 94
+_g] 91
+Psf 90
+x? 89
+fd 88
+fO 88
+_lj 85
+]s 85
+z 85
+s_ 84
+_p 82
+_sf 80
+D 79
+kl 78
+_;f 78
+f/_ 77
+cf 77
+}_ 76
+Psf] 76
+jf 76
+, 75
+fl 75
+\ 75
+_x 74
+kfn 74
+f{ 74
+_/f 74
+]k 73
+of] 73
+,_ 72
+v 72
+lt 72
+_cf 72
+_b 71
+O{ 70
+i 69
+xf 68
+]kf 68
+_/_ 68
+Psf]_ 68
+_h 67
+g]kfn 67
+g]k 67
+d_ 67
+g]kf 67
+O{_ 67
+]kfn 67
+_g]k 66
+g' 66
+_g]kf 66
+o_ 64
+" 64
+;b 63
+Gq 63
+f+ 63
+n_ 63
+fk 62
+sf/ 62
+/L 62
+_kl 62
+I 61
+T 61
+/s 61
+If 61
+dG 60
+_j 60
+]sf 60
+;_ 59
+u_ 59
+yf 59
+dGq 58
+fO{ 58
+af 57
+eP 57
+fs 57
+fO{_ 57
+fj 56
+f; 56
+qL 56
+l/ 55
+\_ 55
+;D 54
+dGqL 54
+GqL 54
+;+ 53
+g\ 53
+/sf 52
+nfO 51
+_kf 51
+ug 51
+pg 51
+:t 51
+fp 50
+_ug 50
+u/ 50
+lg_ 49
+ePs 49
+fdf 48
+cl 48
+nfO{_ 48
+nfO{ 48
+;/ 48
+_cl 47
+F 47
+'/ 47
+g\_ 47
+_r 47
+fo 47
+_eP 47
+]{ 46
+Z 46
+/L_ 46
+_f_ 45
+ePsf 45
+gd 45
+_;D 45
+if 45
+Gb 44
+lb 44
+Q 44
+_n 44
+fsf 44
+]sf] 43
++; 43
+nL 43
+lx 43
+_t 43
+_: 42
++;b 42
+_o 42
+fdf_ 42
+bf 42
+hf 42
+_lg 42
+ePsf] 41
+j_ 41
+ln 41
+ef 41
+/] 41
+_df 41
+To 40
+klg_ 40
+_klg_ 40
+]sf]_ 40
+klg 40
+_klg 40
+_u/ 40
+f{_ 40
+_ePs 40
+'g 40
+Gg 39
+:y 39
+_g\ 39
+f} 39
+kf_ 39
+:tf 38
+]{_ 38
+fpg 38
+b' 38
+hg 38
+g]{ 38
+jZ 38
+_;+ 37
+gs 37
+wf 37
+o{ 37
+GqL_ 37
+l_ 37
+qL_ 37
+dGqL_ 37
+gf_ 37
+]; 37
+_ePsf 36
+x' 36
+?_ 36
+! 36
+fx 35
+u| 34
+;d 34
+_lb 34
+fn_ 34
+_P 34
+fg_ 34
+= 34
+bn 34
+Zj 34
+o; 34
+g]{_ 34
+_g\_ 34
+kIf 33
+dl 33
+kI 33
+ljZj 33
+jZj 33
+rf 33
++u 33
+]d 33
+ljZ 33
+nL_ 32
+f+; 32
+fnL 32
+gL 32
+;/sf 32
+tf_ 32
+wfg 32
+_;/ 32
+Qm 32
+fb 32
+f;_ 32
+ld 32
+;/sf/ 32
+/sf/ 32
+_x' 32
+;/s 32
+;f+ 32
+_;f+ 32
+fu 32
+x?_ 32
+;f+; 31
+_k|:t 31
+s' 31
+_kI 31
+_k|: 31
+of]_ 31
+_kIf 31
+_;f+; 31
+]l 31
+k|: 31
+bg 31
+_;/s 31
+|: 31
+f+;b 31
+_;/sf 31
+|:t 31
+|] 31
+k|:t 31
+m_ 31
+;f+;b 31
+Jo 30
+k|wf 30
+|w 30
+|wf 30
+k|w 30
+km 30
+J 30
+k|wfg 30
+|wfg 30
+]t 30
+@ 30
+;f] 30
+b_ 29
+> 29
+kfn_ 29
+fn] 29
+gsf 29
+sf+ 29
+Zjf 29
+]kfn_ 29
+jif 29
+ji 29
+jf; 29
+_k|w 29
+fgd 29
+_k|wf 29
+Zjf; 29
+ljZjf 28
+jZjf; 28
+]n 28
+_z 28
+_s' 28
+_dG 28
+|]; 28
+_;d 28
+jZjf 28
+u|] 27
+gdGq 27
+gdG 27
+?n 27
+x'g 27
+|wfgd 27
+wfgd 27
+gdGqL 27
++u|] 27
+f+u 27
+fgdG 27
+u|]; 27
++u|]; 27
+wfgdG 27
+dfg 27
+a} 27
+_dGq 27
+f+u| 27
+fgdGq 27
+sf+u| 27
+_sf+u 27
+f+u|] 27
++u| 27
+sf+u 27
+_sf+ 27
+x?n 27
+S 26
+_To 26
+]kfnL 26
+_! 26
+_T 26
+ul 26
+/f] 26
+fsf] 26
+af_ 26
+;s 26
+kfnL 26
+|:tf 26
+k|:tf 26
diff --git a/libtextcat/data/new_fingerprints/lm/norwegian.lm b/libtextcat/data/new_fingerprints/lm/norwegian.lm
new file mode 100644
index 000000000000..f2c3cec3aae3
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/norwegian.lm
@@ -0,0 +1,400 @@
+_ 22970
+e 6833
+n 4206
+r 3516
+t 3112
+a 2587
+s 2440
+i 2112
+l 1901
+o 1900
+n_ 1875
+r_ 1761
+k 1713
+g 1630
+en 1615
+m 1508
+e_ 1450
+d 1444
+er 1436
+h 1306
+t_ 1300
+_h 1180
+_s 1148
+er_ 1105
+v 982
+en_ 976
+an 919
+. 901
+._ 791
+_. 781
+et 770
+g_ 762
+_._ 726
+Ã¥ 725
+u 719
+f 709
+p 702
+ha 682
+_ha 672
+de 657
+te 651
+_e 621
+et_ 614
+re 581
+ne 565
+_o 554
+an_ 544
+ke 534
+_, 522
+,_ 522
+, 522
+_,_ 522
+_f 519
+_m 515
+or 503
+_d 483
+_i 480
+Ã¥_ 479
+se 476
+m_ 469
+nn 454
+b 449
+me 441
+ø 434
+_a 413
+st 404
+_t 398
+og 380
+_v 377
+_og 366
+ar 364
+el 364
+le 361
+i_ 356
+om 353
+og_ 351
+_og_ 351
+li 350
+_k 346
+_de 339
+ge 339
+han 337
+y 333
+_han 332
+ve 330
+kk 323
+in 311
+_b 307
+fo 301
+j 301
+il 298
+_H 291
+H 291
+han_ 288
+_han_ 288
+for 287
+ik 281
+l_ 278
+kke 277
+tt 276
+ti 270
+ne_ 270
+d_ 269
+ed 269
+om_ 268
+nne 266
+_me 264
+ng 257
+_er 257
+_fo 256
+eg 256
+_se 256
+_g 256
+un 255
+ig 255
+sk 253
+_er_ 252
+_p 252
+_for 250
+ke_ 249
+_n 238
+_l 233
+al 232
+ør 222
+s_ 221
+ar_ 215
+at 214
+_en 211
+he 211
+pe 209
+_i_ 208
+am 200
+es 200
+si 200
+enn 197
+det 195
+or_ 193
+vi 190
+ns 189
+ikk 188
+det_ 185
+so 185
+un_ 183
+il_ 181
+nd 181
+te_ 181
+"_ 180
+" 180
+_"_ 180
+_" 180
+em 179
+_ti 176
+kke_ 176
+lig 174
+ten 174
+Ha 173
+_Ha 173
+re_ 172
+ikke 168
+je 165
+Han 165
+ter 165
+_Han 165
+eg_ 164
+på 164
+_på 163
+_si 163
+_Ã¥ 163
+_Han_ 162
+Han_ 162
+på_ 162
+_på_ 161
+til 160
+som 160
+_so 159
+den 159
+_det 157
+ed_ 155
+ll 155
+_ik 155
+rt 155
+som_ 153
+ra 152
+a_ 152
+har 152
+nt 152
+de_ 152
+tr 151
+v_ 151
+_har 151
+ka 151
+ig_ 150
+_som 150
+for_ 150
+_som_ 150
+_en_ 149
+hu 149
+_ikk 148
+_ham 148
+ham 148
+ste 148
+_det_ 148
+_ikke 148
+enne 148
+ikke_ 148
+har_ 147
+nge 147
+D 147
+_har_ 147
+_D 147
+am_ 147
+ere 147
+ham_ 146
+_ham_ 146
+it 145
+_he 144
+_til 144
+av 143
+va 140
+men 140
+Ã¥r 140
+_ve 140
+_hu 139
+ta 139
+pen 137
+sp 137
+_st 135
+tte 135
+la 135
+_E 133
+E 133
+den_ 130
+is 130
+til_ 128
+_r 128
+tt_ 128
+Ã¥r_ 127
+k_ 124
+_Ã¥_ 124
+ri 124
+_til_ 124
+at_ 123
+ene 123
+seg 123
+_av 123
+med 122
+_vi 122
+_seg 122
+seg_ 121
+_seg_ 121
+_for_ 120
+nne_ 120
+ut 120
+_u 119
+mm 119
+mme 119
+De 118
+_De 118
+_at 118
+_hun 117
+hun 117
+ko 117
+be 116
+_at_ 115
+ter_ 115
+pen_ 114
+ker 113
+hun_ 113
+_hun_ 113
+on 111
+lig_ 111
+.. 110
+hen 107
+_med 107
+rs 106
+ser 106
+med_ 105
+_men 104
+_hen 104
+_sk 104
+_med_ 104
+ak 103
+ans 103
+ker_ 102
+av_ 101
+_ka 101
+no 100
+ver 100
+ler 99
+J 99
+spe 99
+ten_ 99
+_J 99
+ene_ 98
+ld 98
+hv 98
+_av_ 98
+ger 97
+ni 96
+gen 96
+ie 95
+ser_ 94
+_et 94
+spen 94
+_hv 94
+men_ 93
+Espe 92
+Es 92
+_Esp 92
+_Es 92
+_Espe 92
+Esp 92
+_al 92
+Espen 92
+lle 89
+rem 89
+id 89
+fø 89
+ei 88
+inn 88
+rd 88
+enne_ 88
+_henn 87
+henne 87
+henn 87
+kt 86
+spen_ 86
+_om 86
+ler_ 86
+da 86
+ett 86
+itt 86
+bl 85
+to 85
+_Je 84
+ger_ 84
+Je 84
+æ 84
+ma 83
+ing 83
+ær 83
+ns_ 83
+eli 82
+ang 82
+_be 82
+så 82
+_den 82
+pp 81
+rk 81
+dr 81
+oe 81
+ss 81
+_fø 80
+ek 80
+le_ 79
+_no 79
+kj 78
+elig 78
+nes 78
+nn_ 77
+nk 77
+fr 77
+sl 77
+my 77
+kan 77
+så_ 76
+as 76
+_om_ 76
+_kan 75
+_ko 75
+_bl 73
+Hu 73
+nen 73
+_Hu 73
+eng 73
+gj 73
+rt_ 72
+ge_ 72
+ba 72
+lv 71
+rer 71
+nde 71
+ls 70
+lo 70
+ga 70
+_noe 70
+ro 70
+_den_ 70
+_ut 70
+noe 70
+Hun 69
+Hun_ 69
+_in 69
+_Hun 69
+_Hun_ 69
+ren 68
+øre 68
+ør_ 68
+sen 68
+sa 67
diff --git a/libtextcat/data/new_fingerprints/lm/persian.lm b/libtextcat/data/new_fingerprints/lm/persian.lm
new file mode 100644
index 000000000000..858f468ae54a
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/persian.lm
@@ -0,0 +1,400 @@
+_ 12318
+? 5938
+¤ 2613
+?_ 1815
+¢ 1745
+þ 1569
+ø 1546
+õ 1422
+- 1333
+÷ 1155
+ù 991
+¨ 841
+_ù 778
+ö 761
+î 693
+ü 664
+_ö 663
+ª 660
+¤? 631
+õ_ 624
+_? 601
+?¤ 598
+Â¥ 589
+_¤ 565
+?? 564
+ý 513
+¢_ 486
+_ü 473
+_ý 469
+þ? 459
+û 428
+ø_ 427
+_¢ 398
+î_ 392
+ó 390
+ê 384
+ö? 379
+÷? 361
+_ø 337
+_- 334
+> 327
+ú 324
+_ö? 319
+ù? 313
+_ø_ 308
+ð 305
+¤¢ 303
+¤_ 297
+?õ 290
+¨? 285
+¢÷ 272
+-_ 271
+S 266
+_ù? 265
+ô 259
+-¨ 255
+¡ 255
+??_ 255
+ý? 254
+Â¥? 250
+Y 246
+¤¢_ 245
+ì 243
+_ý? 238
+?¢ 234
+_Â¥ 229
+ò 229
+_S 228
+¤ø 216
+ø¤ 215
+?þ 215
+?¨ 214
+_¤¢ 214
+ñ 208
+þ?_ 207
+¢¤ 203
+?- 202
+_ô 199
+ù?_ 199
+öþ 195
+¨_ 190
+¨?_ 190
+_?? 189
+õ? 188
+. 188
+_¤¢_ 185
+_ñ 183
+?ø 181
+_öþ 180
+¢? 177
+* 176
+÷_ 176
+_ù?_ 174
+?û 173
+¤þ 172
+ª_ 170
+þõ 164
+ä 161
+¯ 158
+-¨? 156
+û_ 155
+¤?_ 155
+_Â¥? 152
+üõ 151
+ý¤ 150
+_¤? 146
+Â¥?_ 144
+þ¤ 143
+¢ø 141
+?¤_ 140
+*_ 138
+ü÷ 137
+?÷ 136
+¬ 133
+ùî 132
+_ý¤ 131
+-ª 131
+ø? 130
+?ú 129
+_. 129
+þó 129
+¡_ 128
+ù¢ 124
+¤õ 121
+¤- 120
+ùî_ 120
+_ùî 120
+_??_ 119
+ª? 119
+-¨?_ 119
+ã 118
+-? 118
+>_ 116
+öþ? 114
+ê_ 111
+¢þ 109
+_Â¥?_ 109
+_ü÷ 109
+_ùî_ 108
+_öþ? 107
+ö?¤ 107
+© 106
+_¢÷ 106
+.¢ 104
+?Â¥ 103
+_?¤ 103
+¤ê 103
+ó? 103
+þ¢ 103
+ñ? 102
+ªî 100
+?î 100
+?¤? 98
+ð_ 98
+ý?û 96
+¤ú 95
+öþ?_ 95
+_© 94
+§ 94
+Y_ 94
+_ö?¤ 93
+_ý?û 93
+¥ø 92
+øª 91
+_ñ? 91
+_öþ?_ 91
+?¨_ 90
+÷?_ 90
+óþ 90
+õ?_ 90
+ü- 90
+øõ 89
+: 89
+ªî_ 88
+ü÷? 88
+öø 88
+_ù¢ 87
+-ê 86
+þ÷ 85
+_öø 83
+, 82
+þõ_ 82
+þ¨ 82
+?¢_ 80
+® 80
+?¤þ 79
+_ü÷? 79
+¢ª 77
+??? 77
+øþ 76
+_-? 75
+֔ 75
+?ª 75
+?ì 75
+¤î 74
+_¤ø 73
+ä_ 73
+þóþ 73
+û? 73
+óþõ 72
+þóþõ 71
+ù¤ 71
+?> 71
+ê? 70
+_?¤_ 70
+_.¢ 70
+ø¡ 69
+à 69
+_§ 68
+¥ø¤ 68
+þóþõ_ 67
+_ü- 67
+¤øª 67
+óþõ_ 67
+õû 67
+¤ê_ 67
+¨õ 67
+¤õ_ 66
+?-_ 65
+ù÷ 65
+ý?¤ 65
+öøþ 64
+¤þ? 64
+¬_ 64
+?ð 63
+?-¨ 63
+ø- 63
+_ý?¤ 62
+ø?_ 62
+ø¤_ 62
+_-¨ 61
+õþ 61
+øþóþõ 60
+øþóþ 60
+øþó 60
+?ù 60
+øªî 60
+_¢ø 60
+¤øªî 60
+_öøþ 60
+-ª? 60
+ì_ 60
+¨¤ 60
+ý¤? 60
+÷þ 59
+öøþóþ 59
+¯? 59
+_¢? 59
+öøþó 59
+¢ª_ 59
+¤?? 58
+¤?¥ 58
+_üõ 58
+.- 58
+?¡ 58
+ø÷ 57
+í 57
+¢¤î 57
+?¤?_ 57
+þ?¤ 57
+ù- 57
+üõ_ 56
+ñ?¨ 56
+øõ_ 56
+?¤þ? 56
+ú? 56
+î¤ 56
+¤?¢ 56
+õû_ 55
+ô? 55
+ª?¢ 55
+_öøþó 55
+-ª?¢ 55
+þ_ 55
+ö?¤þ 55
+¤î_ 55
+S¤ 55
+üþ 54
+_S¤ 53
+øªî_ 53
+_ù¤ 53
+ò¢ 53
+¤ò 53
+¤øªî_ 53
+ø¡_ 52
+¢÷? 52
+û¤ 52
+üþ? 52
+õ> 52
+-õ 52
+¢¤? 52
+üð 52
+ì? 52
+_í 52
+î? 52
+üó 52
+??¤ 52
+ð÷ 51
+¤ò¢ 51
+-þ 51
+_ý¤? 51
+¡? 50
+_¢¤ 50
+¢¤î_ 50
+.-¨ 50
+ö?õ 50
+ë 50
+_ô? 50
+ø¢ 50
+_ñ?¨ 49
+.¢÷ 49
+÷¤ 49
+ý?¤? 49
+ú- 48
+_üð 48
+_ö?¤þ 48
+¤þ?_ 48
+ò¢_ 48
+ûù 48
+ú?_ 48
+¤ò¢_ 47
+ê¤ 47
+â 47
+.-¨? 47
+_¢þ 47
+>÷ 47
+úª 47
+÷?õ 47
+£ 47
+?¤þ?_ 47
+?ûù 47
+¤¯ 46
+_.- 46
+_ý?¤? 46
+õ¤ 46
+õó 46
+-¤ 46
+_üþ 46
+¥þ 46
+¤ð 45
+ôþ 45
+_üó 45
+ö?- 45
+§þ 45
+_ë 45
+f 45
+õ- 44
+_ù- 44
+úõ 44
+_üþ? 44
+ü-ª 44
+-î 44
+ð¢ 44
+ö?¤þ? 44
+õª 44
+.-¨?_ 44
+¨õ_ 44
+¤ª 44
+_-¨? 44
+¤?î 43
+ä? 43
+é 43
+?õ_ 43
+_¤øªî 43
+þª 43
+?ê 43
+?î_ 43
+_¤øª 43
+¯_ 43
+¤¨ 43
+þ?? 42
+Y? 42
+Â¥_ 42
+÷÷ 42
+÷õ 42
+_¥ø 42
+þ¨? 42
+¢õ 42
+/ 42
+_â 42
+???_ 42
+_¥ø¤ 42
+_ù÷ 42
+s 42
+¢÷÷ 42
+ö?-¨ 41
+÷ø 41
+ù÷? 41
+?* 41
+S¤ò¢ 41
+_ö?õ 41
+ñþ 41
+_.-¨ 41
+îþ 41
+÷¢ 41
+ùõ 41
+S¤ò 41
+ªø 41
+ý?¤?_ 40
+¤úª 40
diff --git a/libtextcat/data/new_fingerprints/lm/polish.lm b/libtextcat/data/new_fingerprints/lm/polish.lm
new file mode 100644
index 000000000000..eac3b27eca6a
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/polish.lm
@@ -0,0 +1,400 @@
+_ 31480
+a 7945
+i 7766
+e 7462
+o 6838
+z 5104
+n 5077
+r 4178
+w 4139
+s 3596
+c 3580
+y 3569
+t 3381
+d 3027
+k 2819
+p 2639
+m 2494
+ie 2484
+u 2016
+l 1947
+j 1932
+ni 1930
+e_ 1746
+_p 1712
+³ 1649
+a_ 1496
+o_ 1431
+, 1368
+,_ 1366
+b 1296
+_w 1257
+g 1249
+i_ 1140
+. 1118
+_s 1045
+ze 1041
+._ 1035
+_n 997
+nie 958
+cz 956
+rz 952
+h 950
+_z 933
+ê 918
+ow 911
+ie_ 902
+y_ 899
+na 885
+ch 871
+po 864
+pr 863
+wi 851
+st 847
+¿ 835
+± 832
+an 814
+ó 800
+ra 778
+zy 766
+ia 726
+za 718
+_t 699
+wa 692
+ro 692
+_d 683
+_pr 675
+¶ 661
+w_ 658
+sz 652
+_po 649
+_o 628
+m_ 613
+li 613
+dz 611
+ki 611
+en 609
+mi 593
+ta 571
+ci 564
+ej 562
+nie_ 558
+_m 553
+_k 549
+ar 543
+go 541
+_i 540
+em 532
+od 525
+yc 520
+a³ 511
+rze 508
+do 504
+eg 503
+ko 502
+ac 482
+to 478
+_na 467
+_ni 467
+h_ 466
+ch_ 466
+æ 465
+iê 461
+_b 458
+on 458
+u_ 458
+zi 454
+ka 450
+er 448
+sk 447
+si 447
+wy 444
+te 437
+ak 434
+ê_ 431
+_j 429
+je 429
+z_ 427
+ny 422
+aw 422
+ne 420
+ów 418
+_w_ 415
+_c 414
+ego 412
+prz 412
+_r 410
+al 407
+³a 405
+" 405
+re 405
+es 401
+_nie 396
+dzi 394
+ty 389
+j_ 380
+ic 380
+_prz 379
+ad 367
+ej_ 364
+le 359
+æ_ 358
+ed 354
+ych 346
+_za 346
+_do 344
+zn 344
+go_ 344
+ani 343
+_i_ 342
+no 339
+or 337
+³o 336
+tr 334
+P 330
+os 329
+am 329
+da 328
+ec 327
+ol 325
+±_ 325
+by 322
+ego_ 321
+at 321
+¿e 319
+ym 317
+wie 314
+³_ 312
+- 310
+na_ 309
+_si 303
+W 301
+as 301
+wo 300
+pa 299
+siê 295
+nia 293
+owa 292
+o¶ 286
+_P 283
+el 282
+_siê 279
+ja 278
+rzy 276
+prze 276
+_wy 275
+iê_ 269
+in 267
+_- 267
+de 267
+kie 267
+dn 264
+ob 262
+_u 261
+ych_ 260
+-_ 258
+ez 257
+_-_ 256
+¶c 255
+ws 255
+¶ci 254
+em_ 253
+_siê_ 253
+siê_ 253
+_nie_ 253
+kt 252
+ski 252
+we 251
+_g 251
+_W 250
+t_ 249
+_prze 249
+_je 248
+aj 247
+_a 247
+¿e_ 246
+_¿ 244
+ia_ 243
+eni 241
+om 240
+la 240
+k_ 235
+mo 235
+f 234
+pi 232
+is 231
+cze 231
+_z_ 230
+ñ 228
+nt 227
+ce 224
+sta 221
+ry 220
+ma 219
+cj 219
+zie 218
+ek 216
+oc 213
+dy 212
+owi 208
+sp 208
+K 208
+tó 205
+_" 204
+ud 203
+S 202
+ier 202
+pra 202
+czn 201
+ys 200
+nia_ 199
+j± 198
+_¿e 196
+oz 194
+N 192
+zo 191
+dzie 190
+ku 190
+ów_ 189
+czy 189
+_mi 188
+_¿e_ 187
+mie 186
+do_ 186
+iej 185
+ym_ 185
+_do_ 184
+cy 184
+_pa 183
+y³ 182
+_na_ 181
+zy_ 181
+ew 180
+_l 180
+_cz 178
+_by 178
+ru 177
+to_ 175
+±c 175
+_wi 175
+ln 174
+_K 171
+ok 170
+ot 170
+raw 169
+nych 168
+nyc 168
+az 168
+ik 167
+bi 167
+i,_ 166
+i, 166
+_te 165
+tu 163
+wan 163
+et 162
+J 162
+ór 161
+cie 160
+_od 160
+ba 160
+oj 159
+o¶c 158
+i³ 158
+_ro 158
+og 158
+mu 158
+A 158
+o¶ci 158
+d_ 157
+tw 156
+io 155
+gr 155
+_J 155
+_N 155
+_to 155
+us 155
+¿_ 154
+aæ 154
+ach 154
+yd 153
+_kt 153
+_ko 152
+_st 152
+pow 151
+yt 150
+kr 149
+_S 149
+a³_ 149
+ak_ 149
+li_ 148
+T 148
+ur 148
+sa 148
+op 147
+yw 147
+ent 147
+bo 146
+_sp 145
+O 145
+a,_ 144
+a, 144
+iem 144
+któ 143
+praw 143
+wn 142
+tór 142
+i± 141
+³y 141
+zu 141
+dni 140
+im 139
+icz 139
+ró 139
+³e 138
+któr 137
+my 137
+est 136
+awi 135
+przy 134
+nych_ 134
+e,_ 132
+e, 132
+ki_ 132
+pro 131
+_któ 131
+_któr 131
+_przy 130
+_ja 129
+szy 127
+ia³ 127
+Po 127
+ania 127
+M 126
+ze_ 125
+ne_ 125
+ñs 124
+_ty 124
+sze 124
+a¿ 124
+nn 124
+pod 124
+aæ_ 122
+B 122
+za_ 122
+cza 122
+³o_ 122
+Z 122
+_to_ 121
+iu 120
+zc 119
+esz 119
+skie 118
+i. 118
+_ka 117
+so 117
+trz 117
+o¿ 116
+ieg 116
+nik 116
+ga 116
diff --git a/libtextcat/data/new_fingerprints/lm/portuguese.lm b/libtextcat/data/new_fingerprints/lm/portuguese.lm
new file mode 100644
index 000000000000..4e1a5d7698e4
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/portuguese.lm
@@ -0,0 +1,400 @@
+_ 35328
+a 10423
+e 10132
+o 8919
+s 6795
+r 6033
+i 5443
+n 4588
+d 4531
+t 4217
+m 3476
+u 3404
+o_ 3240
+a_ 3029
+e_ 2879
+c 2756
+s_ 2461
+_d 2379
+l 2307
+p 2242
+_a 1753
+de 1751
+, 1660
+,_ 1658
+_e 1454
+es 1447
+os 1412
+ra 1343
+_p 1328
+nt 1302
+_de 1248
+do 1215
+en 1176
+re 1150
+as 1123
+v 1115
+m_ 1113
+de_ 1096
+er 1082
+g 1053
+_c 1047
+da 1008
+co 986
+os_ 975
+te 974
+ar 950
+or 943
+q 938
+qu 938
+_s 908
+ta 902
+_de_ 901
+_o 858
+se 841
+ue 831
+to 799
+ad 777
+. 761
+que 752
+em 751
+an 748
+f 746
+r_ 745
+b 732
+st 718
+is 716
+al 712
+_qu 706
+_q 706
+in 701
+as_ 696
+ã 695
+do_ 685
+ent 678
+ão 677
+_n 671
+_co 660
+_a_ 654
+_m 646
+on 645
+ç 624
+ri 623
+_que 619
+ma 602
+po 581
+ia 580
+ão_ 575
+._ 573
+na 572
+me 564
+ro 554
+_t 544
+pa 533
+da_ 528
+h 523
+ue_ 515
+ca 511
+que_ 509
+nte 503
+no 499
+tr 498
+am 496
+em_ 491
+_que_ 487
+_se 485
+om 471
+io 460
+_do 459
+ti 448
+ci 445
+_da 444
+nd 442
+ei 435
+ra_ 435
+pr 427
+_r 423
+_e_ 420
+_f 420
+ss 412
+es_ 412
+el 407
+id 406
+_o_ 399
+_pa 390
+um 379
+pe 378
+_po 376
+la 374
+ir 371
+á 371
+ic 362
+di 362
+li 359
+é 359
+_re 353
+ve 353
+mo 350
+s, 349
+s,_ 349
+ou 347
+com 340
+sa 338
+si 338
+men 337
+rt 331
+_i 330
+con 330
+o, 327
+_da_ 326
+o,_ 326
+se_ 325
+_com 325
+ado 323
+to_ 322
+ai 322
+it 320
+A 319
+ec 316
+dos 316
+_em 312
+ção 310
+aç 310
+çã 310
+ara 305
+so 299
+tu 299
+res 297
+im 296
+_pr 295
+mi 293
+ua 292
+nto 291
+ment 290
+í 290
+par 288
+_do_ 287
+ce 286
+est 286
+u_ 284
+ente 284
+S 278
+l_ 278
+_u 278
+" 276
+ni 276
+z 274
+sta 273
+nc 272
+_em_ 270
+P 269
+ção_ 267
+_v 267
+at 267
+dos_ 266
+_es 262
+« 259
+_« 259
+te_ 258
+» 257
+va 255
+le 252
+ur 252
+_um 252
+vi 251
+_par 250
+a, 247
+a,_ 247
+_con 247
+ant 242
+lo 240
+ia_ 240
+gu 237
+ar_ 235
+ac 235
+e,_ 234
+e, 234
+no_ 232
+eg 232
+il 232
+ns 232
+er_ 231
+_ma 230
+por 230
+_in 228
+_l 226
+ó 225
+ont 224
+_no 223
+_P 222
+tra 220
+E 219
+ida 218
+is_ 217
+ol 216
+açã 215
+ter 215
+ação 215
+_A 211
+un 211
+- 210
+_te 210
+or_ 209
+ma_ 208
+_pe 208
+ara_ 208
+C 206
+ist 202
+para 202
+nta 201
+ais 201
+ut 198
+nte_ 198
+j 197
+dad 196
+_na 195
+am_ 195
+ade 193
+ica 191
+x 190
+al_ 189
+O 188
+des 187
+_para 187
+ada 187
+nh 186
+_se_ 186
+mp 185
+ndo 184
+R 183
+_por 181
+ação_ 181
+para_ 179
+eir 177
+ui 177
+vo 177
+ou_ 177
+ta_ 177
+M 176
+ria 175
+tos 175
+rr 174
+D 174
+io_ 174
+br 174
+_di 173
+õ 173
+õe 173
+fo 173
+I 172
+ões 172
+_C 171
+mo_ 171
+ov 170
+pro 169
+_os_ 169
+_os 169
+das 167
+iv 166
+uma 165
+gr 165
+su 164
+fi 164
+um_ 162
+na_ 162
+ga 162
+ais_ 161
+_S 161
+lh 159
+ort 159
+cia 158
+.. 157
+_est 156
+cont 156
+ig 155
+á_ 154
+ran 154
+ça 154
+om_ 153
+_en 152
+dade 152
+_as 152
+ho 152
+ntr 151
+nto_ 151
+fe 150
+N 149
+das_ 149
+uma_ 149
+ess 149
+é_ 148
+ndo_ 147
+ob 147
+»_ 147
+ul 146
+ente_ 146
+go 146
+ento 144
+ver 144
+_des 144
+gi 144
+ha 142
+cu 142
+idad 142
+av 141
+ões_ 141
+_pro 141
+ura 141
+ap 139
+_com_ 139
+_ca 139
+com_ 139
+ao 139
+ne 138
+od 138
+_" 137
+_M 137
+pre 137
+ras 136
+_me 136
+_ao 136
+_no_ 134
+oc 134
+str 133
+tes 133
+_b 133
+and 133
+_g 133
+ro_ 133
+omo 133
+_dos 132
+_fo 132
+_dos_ 132
+rn 132
+mento 131
+ito 131
+ev 131
+rio 130
+ass 130
+eu 130
+be 128
+os, 128
+os,_ 128
+sp 127
+_uma 127
+ep 126
+tad 125
+s. 125
+_uma_ 125
+_E 125
+idade 124
+_um_ 124
+nã 124
+não 124
+ct 123
+ram 123
+ado_ 123
+ela 123
+omo_ 121
+iz 121
+_an 121
diff --git a/libtextcat/data/new_fingerprints/lm/quechua.lm b/libtextcat/data/new_fingerprints/lm/quechua.lm
new file mode 100644
index 000000000000..e59992d781b3
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/quechua.lm
@@ -0,0 +1,400 @@
+_ 5766
+a 4900
+n 1941
+i 1666
+u 1384
+s 1032
+t 995
+y 939
+h 929
+k 915
+q 909
+p 882
+a_ 847
+an 821
+r 783
+m 740
+c 705
+l 695
+ta 637
+ch 613
+ay 587
+qa 557
+pa 490
+ha 486
+e 474
+ma 457
+o 441
+na 434
+ku 411
+j 409
+un 367
+w 358
+in 353
+, 345
+,_ 344
+cha 318
+ar 317
+n_ 315
+as 291
+wa 289
+ta_ 269
+ll 259
+man 255
+_k 248
+._ 243
+. 243
+nt 227
+am 224
+pi 222
+la 222
+ka 217
+ac 214
+ni 214
+at 213
+aq 213
+i_ 208
+ri 207
+qa_ 204
+una 204
+y_ 192
+aj 192
+_p 192
+is 188
+_m 181
+lla 175
+ach 174
+rq 173
+us 172
+an_ 171
+_ka 171
+ata 169
+rqa 165
+sq 163
+hu 162
+sp 161
+_w 157
+nk 157
+hay 157
+_s 156
+sqa 155
+ki 153
+kun 152
+_c 152
+al 150
+nta 149
+ap 147
+ant 146
+yk 146
+ay_ 144
+spa 141
+hi 137
+_ch 136
+_n 136
+ya 135
+' 134
+j_ 133
+uy 132
+ra 132
+a,_ 132
+a, 132
+ti 130
+_a 125
+nc 125
+kuna 122
+s_ 121
+su 121
+ak 121
+_ma 118
+ana 118
+ari 115
+_t 114
+ama 114
+chi 114
+ñ 113
+a._ 111
+a. 111
+nch 111
+iy 111
+all 110
+aw 110
+_r 110
+anta 109
+ayk 109
+na_ 109
+chay 108
+sa 104
+_wa 104
+si 103
+chu 102
+pa_ 101
+acha 101
+_cha 101
+pi_ 101
+qan 100
+_pa 99
+_q 97
+aj_ 97
+awa 97
+ank 95
+nku 95
+im 94
+q_ 92
+uk 92
+C 92
+mu 90
+tu 89
+J 89
+_ni 88
+taj 87
+_J 87
+nin 86
+_chay 86
+u_ 86
+_C 84
+wan 83
+nta_ 81
+_j 81
+mant 80
+ut 79
+in_ 79
+ik 79
+manta 79
+asq 79
+yt 78
+n, 78
+asqa 78
+n,_ 78
+pay 78
+li 77
+yn 77
+nq 76
+yta 76
+ic 76
+up 76
+_Ch 75
+yku 75
+Ch 75
+he 75
+hay_ 74
+nan 74
+ina 74
+ur 73
+er 72
+S 72
+arq 72
+or 72
+_l 71
+_u 71
+aq_ 70
+os 70
+yp 70
+anc 69
+man_ 69
+mi 69
+ich 68
+_i 68
+st 67
+_S 67
+arqa 66
+it 66
+anch 66
+ru 66
+aku 65
+pu 65
+ña 65
+alla 64
+mp 64
+sqa_ 64
+'a 64
+ayku 63
+es 63
+A 63
+ia 63
+_man 63
+_Cha 61
+Cha 61
+taj_ 60
+api 60
+_ll 60
+wi 60
+ayp 60
+aman 59
+g 58
+anku 58
+yki 57
+ima 57
+yni 57
+oj 57
+mana 57
+anta_ 57
+_su 57
+uc 56
+isp 56
+ispa 56
+uch 56
+M 56
+ir 56
+_h 55
+nqa 55
+kuy 55
+ayt 54
+_M 54
+b 54
+_y 53
+_mana 53
+: 53
+uku 53
+:_ 53
+nm 53
+au 52
+ayta 52
+io 52
+qo 51
+an,_ 51
+apa 51
+spa_ 51
+erq 51
+_wi 51
+erqa 51
+_sa 51
+an, 51
+el 50
+um 50
+ana_ 50
+han 50
+il 50
+on 49
+chay_ 49
+sta 49
+_D 49
+D 49
+iku 49
+aqa 49
+che 48
+en 48
+yta_ 48
+Ma 47
+P 47
+_lla 47
+_Je 47
+yq 47
+Je 47
+ita 47
+rqan 47
+ypi 46
+har 46
+Jes 46
+_Jes 46
+ios 46
+ayq 46
+Dio 46
+un_ 46
+kus 46
+_Dio 46
+taq 46
+_Dios 46
+_Ma 46
+_Di 46
+Dios 46
+Di 46
+d 46
+kan 45
+Chay 45
+oq 45
+_Chay 45
+_pay 45
+upa 45
+mun 45
+ata_ 44
+_tu 44
+nis 44
+re 44
+paq 44
+yo 44
+ej 44
+qay 43
+ncha 43
+ha_ 43
+_A 43
+I 43
+_kan 43
+_nis 43
+_P 43
+nman 43
+nma 43
+ataj 42
+ara 42
+ku_ 42
+nata 42
+nat 42
+i, 41
+tin 41
+qh 41
+t' 41
+orq 41
+nki 41
+_ru 41
+_ku 41
+i,_ 41
+ip 40
+ham 40
+usq 40
+_ya 40
+qank 39
+orqa 39
+ayn 39
+mana_ 39
+ray 39
+ym 39
+uma 39
+_pu 39
+par 39
+kay 39
+n. 38
+qa,_ 38
+n._ 38
+sus 38
+aypi 38
+usqa 38
+qanku 38
+ill 38
+qa, 38
+was 38
+pa, 38
+pa,_ 38
+asp 38
+qa._ 37
+_mu 37
+paj 37
+amp 37
+hin 37
+uti 37
+rin 37
+_im 37
+_ima 37
+ja 37
+_ri 37
+rqa_ 37
+taq_ 37
+qa. 37
+sh 36
+spa,_ 36
+cha_ 36
+spa, 36
+achi 36
+una_ 36
+rqank 36
+jt 36
+K 36
+amu 36
+aspa 35
+_Jesu 35
+Jesus 35
+nispa 35
+ki_ 35
+waw 35
+ko 35
+ne 35
+esus 35
+int 35
diff --git a/libtextcat/data/new_fingerprints/lm/romanian.lm b/libtextcat/data/new_fingerprints/lm/romanian.lm
new file mode 100644
index 000000000000..65b8e7554d8e
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/romanian.lm
@@ -0,0 +1,400 @@
+_ 20674
+a 6376
+e 5815
+i 5746
+t 3396
+r 3280
+n 3103
+u 2835
+s 2611
+c 2582
+e_ 2235
+l 2224
+o 2149
+a_ 1974
+d 1629
+m 1528
+p 1410
+i_ 1358
+in 1308
+_c 1167
+_s 1118
+_d 999
+re 905
+ar 898
+, 791
+,_ 786
+_p 785
+de 771
+_a 754
+te 687
+_i 667
+at 654
+ti 645
+ca 639
+n_ 630
+ta 617
+si 614
+_de 609
+f 606
+st 583
+ri 581
+u_ 567
+nt 553
+. 542
+ra 540
+_m 534
+g 528
+v 516
+ul 516
+de_ 513
+_in 503
+b 492
+_de_ 474
+._ 472
+le 459
+l_ 444
+un 443
+_si 440
+es 437
+tr 426
+ea 420
+t_ 412
+ce 412
+ma 407
+cu 402
+er 398
+_ca 397
+si_ 388
+_f 387
+_l 383
+z 382
+la 381
+ne 370
+sa 364
+as 360
+_e 357
+in_ 356
+an 352
+it 351
+te_ 346
+or 345
+el 345
+ci 339
+_si_ 333
+_n 330
+are 324
+pe 319
+re_ 317
+al 310
+_t 309
+se 304
+ic 295
+ie 290
+_u 290
+ul_ 290
+ni 289
+int 285
+_o 280
+en 279
+ta_ 279
+ur 261
+pa 256
+co 255
+_pe 254
+ia 252
+mi 251
+pr 249
+_ma 249
+oa 249
+me 246
+lu 246
+li 241
+im 238
+_in_ 237
+da 237
+na 237
+_sa 235
+ac 234
+- 234
+ii 232
+est 231
+r_ 231
+h 230
+_cu 230
+le_ 229
+ai 229
+ca_ 227
+il 226
+ru 223
+sc 223
+_v 221
+nu 220
+tu 220
+_un 220
+nd 220
+di 219
+are_ 216
+to 215
+am 214
+on 213
+o_ 208
+is 208
+sa_ 203
+la_ 200
+_b 200
+ste 195
+et 194
+ec 191
+_r 186
+car 185
+ui 180
+un_ 179
+lo 178
+cu_ 177
+ei 176
+e, 175
+e,_ 175
+pe_ 171
+m_ 167
+_la 166
+a, 165
+a,_ 164
+_ce 164
+rt 163
+_co 163
+ent 162
+ro 162
+ele 162
+_pe_ 160
+po 160
+ea_ 159
+" 158
+ntr 158
+_cu_ 158
+_pr 157
+ut 157
+nc 156
+ata 155
+care 154
+um 153
+au 151
+va 151
+_o_ 150
+_car 150
+ii_ 145
+ind 145
+_un_ 144
+os 144
+ad 141
+_la_ 140
+I 140
+este 138
+ste_ 138
+care_ 138
+ir 137
+ga 136
+ap 136
+ol 136
+ra_ 136
+_di 134
+D 134
+_care 133
+se_ 133
+om 133
+ara 133
+ati 133
+fi 133
+_sa_ 131
+zi 130
+vi 130
+_ca_ 129
+_se 128
+_nu 128
+ai_ 127
+ch 127
+pi 124
+ve 123
+fa 122
+ot 121
+_a_ 120
+este_ 120
+du 119
+ine 119
+s_ 118
+fo 118
+_ci 118
+ui_ 118
+ba 118
+i, 117
+i,_ 116
+ne_ 115
+us 115
+_g 115
+a. 115
+fe 114
+A 114
+pu 114
+ce_ 113
+ar_ 113
+_pa 113
+oc 112
+sta 112
+lui 112
+ns 112
+em 112
+' 112
+oar 112
+din 111
+iu 111
+_int 111
+ate 111
+mu 111
+hi 110
+ele_ 110
+mp 109
+_D 109
+S 109
+sti 108
+bi 108
+ata_ 107
+ti_ 107
+tra 107
+C 107
+c_ 106
+tre 106
+_al 105
+rea 105
+mai 105
+j 104
+a._ 104
+gi 104
+e. 103
+d_ 103
+_fa 103
+E 102
+mo 102
+at_ 101
+_e_ 101
+nte 101
+lt 101
+sp 101
+za 100
+mai_ 100
+su 99
+na_ 98
+tat 97
+sin 97
+ez 96
+tru 96
+e._ 96
+ie_ 96
+ia_ 96
+_re 96
+tul 96
+_fo 96
+ina 95
+art 95
+_C 95
+no 95
+nu_ 94
+_es 94
+_po 94
+cr 94
+inc 93
+_da 92
+_mai 92
+lui_ 92
+_din 92
+_est 92
+pre 91
+_mai_ 91
+io 91
+chi 91
+ge 90
+pri 90
+eu 90
+uri 90
+az 90
+_nu_ 89
+_me 89
+ct 89
+au_ 88
+esc 88
+ev 88
+ei_ 88
+min 87
+ace 87
+op 86
+ng 86
+ici 86
+_lu 85
+ari 85
+_mi 84
+ita 84
+_S 84
+_tr 84
+ere 83
+or_ 83
+ast 83
+ist 83
+nt_ 83
+_se_ 82
+ou 82
+tin 82
+intr 82
+con 82
+do 81
+_fi 81
+str 81
+am_ 80
+rat 80
+ru_ 80
+ri_ 80
+par 80
+oi 80
+uc 79
+ze 79
+pl 79
+res 78
+_ac 77
+ulu 77
+din_ 76
+va_ 76
+ada 76
+ului 75
+_con 75
+id 75
+inte 74
+ile 73
+cit 73
+_din_ 73
+lor 73
+_" 72
+ig 72
+rin 72
+da_ 72
+_st 72
+-_ 71
+_- 71
+it_ 71
+ani 71
+nd_ 71
+ci_ 70
+ag 70
+eri 70
+i. 70
+tru_ 70
+_ne 70
+rm 70
+P 69
+_este 69
+nta 69
+bu 69
+une 69
+ma_ 69
+nti 69
+imp 68
+_-_ 68
+iv 68
+ind_ 68
diff --git a/libtextcat/data/new_fingerprints/lm/romansh.lm b/libtextcat/data/new_fingerprints/lm/romansh.lm
new file mode 100644
index 000000000000..e65969ca34fa
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/romansh.lm
@@ -0,0 +1,400 @@
+_ 10888
+a 3490
+e 2268
+i 2196
+s 2169
+n 1961
+t 1555
+r 1510
+l 1281
+u 1249
+a_ 1155
+c 1060
+d 954
+o 815
+s_ 744
+g 726
+m 686
+h 620
+p 617
+n_ 559
+v 493
+_s 480
+er 476
+ch 469
+_d 457
+in 455
+da 418
+_c 404
+r_ 388
+_e 377
+. 358
+_p 340
+as 333
+l_ 328
+, 327
+._ 320
+_i 320
+,_ 315
+la 313
+en 312
+sc 310
+an 301
+ta 300
+_da 285
+f 282
+_t 268
+_a 262
+nt 259
+_m 252
+un 248
+ra 247
+i_ 247
+na 245
+ma 245
+ia 241
+ar 234
+sch 228
+b 228
+da_ 217
+es 214
+ai 213
+st 212
+' 209
+e_ 208
+as_ 208
+er_ 200
+t_ 199
+re 199
+_l 195
+al 193
+_n 193
+el 192
+tg 192
+te 188
+z 187
+ha 180
+_f 180
+sa 174
+_da_ 172
+ve 169
+ei 168
+_v 165
+at 165
+ss 161
+is 161
+_ch 161
+on 160
+la_ 158
+cu 158
+ad 158
+he 156
+in_ 147
+va 147
+_in 146
+gl 145
+ns 141
+ur 140
+ü 140
+u_ 138
+ts 138
+pe 136
+li 134
+gi 133
+et 133
+de 132
+ig 132
+or 130
+ti 129
+il 127
+d_ 126
+che 123
+ut 122
+us 122
+cha 121
+di 120
+ia_ 120
+_b 118
+_la 117
+na_ 116
+ain 115
+per 115
+to 115
+_cu 113
+_sc 112
+se 111
+ls 109
+- 108
+iu 108
+ca 107
+si 104
+ir 102
+rt 102
+ie 102
+_g 102
+un_ 102
+nd 101
+av 101
+ni 100
+q 99
+au 97
+ls_ 97
+ll 96
+qu 96
+_pe 96
+le 96
+rs 95
+pa 95
+ri 95
+_ma 94
+_per 93
+am 93
+eg 92
+_r 92
+gn 92
+me 92
+pi 91
+an_ 91
+en_ 91
+ga 91
+ent 90
+hi 90
+_e_ 90
+it 89
+ta_ 88
+ter 87
+ns_ 86
+iv 86
+igl 86
+a. 85
+em 85
+I 85
+chi 84
+_en 84
+int 84
+ue 83
+su 82
+tt 82
+a, 82
+a,_ 82
+o_ 82
+_ve 82
+a._ 82
+_q 81
+_qu 80
+ge 80
+" 80
+_la_ 80
+ar_ 80
+vi 79
+gl_ 79
+tu 78
+ng 78
+ro 76
+mi 76
+sta 75
+ed 75
+lla 74
+ei_ 73
+_o 73
+ic 73
+el_ 73
+_il 73
+_in_ 72
+g_ 72
+pr 71
+nu 70
+ina 70
+_h 69
+scha 68
+mai 68
+pl 68
+il_ 68
+os 68
+ha_ 68
+be 67
+uo 67
+cun 67
+ra_ 67
+_pa 65
+ts_ 64
+s. 64
+co 64
+_u 64
+fi 64
+_I 63
+sa_ 63
+s._ 63
+_re 62
+ün 62
+_nu 62
+? 62
+S 62
+_se 61
+no 61
+nt_ 61
+E 61
+tr 61
+mp 60
+igl_ 60
+_su 60
+_st 60
+ess 60
+im 60
+zi 59
+?_ 59
+nc 59
+_E 58
+_- 58
+_cun 58
+_te 58
+_sa 58
+ant 58
+main 58
+eu 57
+ssa 57
+-_ 57
+iun 57
+_-_ 57
+aint 56
+op 56
+al_ 56
+dal 56
+j 56
+ama 55
+_tg 55
+ua 55
+M 55
+per_ 55
+tsc 54
+nta 54
+tsch 54
+um 54
+fa 54
+za 54
+_di 54
+pia 54
+_per_ 54
+T 54
+_M 53
+ne 53
+era 53
+A 53
+que 53
+_dal 53
+cr 53
+_de 53
+lla_ 53
+_che 52
+h' 52
+_me 51
+ot 51
+_pr 51
+_pl 51
+_sch 51
+ch' 50
+_S 50
+opi 50
+ou 50
+tta 50
+mo 50
+ada 50
+ba 50
+_an 49
+top 49
+id 49
+è 49
+he_ 49
+C 49
+va_ 49
+uto 49
+ins 48
+topi 48
+L 48
+ir_ 48
+ist 48
+c_ 48
+_il_ 48
+P 48
+ss_ 48
+ag 47
+_no 47
+res 47
+las 47
+_vi 46
+s, 46
+schi 46
+_en_ 46
+tg_ 46
+s,_ 46
+_si 46
+_que 45
+_T 45
+az 45
+'i 45
+cun_ 44
+_fa 44
+_mi 44
+utop 44
+utopi 44
+sche 44
+_C 44
+ur_ 44
+tge 44
+po 44
+es_ 44
+x 44
+nz 44
+_L 44
+_cun_ 44
+man 44
+_ch' 43
+_fi 43
+pu 43
+ell 43
+opia 43
+_igl 43
+_ig 43
+sp 43
+topia 43
+ava 42
+egn 42
+che_ 42
+on_ 42
+ci 42
+_P 42
+ev 42
+ond 41
+_" 41
+à 41
+us_ 41
+_ha 41
+D 40
+_co 40
+etg 40
+'e 40
+las_ 40
+est 40
+ura 40
+uel 39
+ed_ 39
+vo 39
+gia 39
+bu 39
+mu 39
+nn 39
+gli 39
+_A 39
+lu 39
+cha_ 39
+ul 38
+mal 38
+_ün 38
+ina_ 38
+_bu 38
+_ca 38
+_ü 38
+uc 38
+nts 38
+tra 38
+_tu 37
diff --git a/libtextcat/data/new_fingerprints/lm/russian.lm b/libtextcat/data/new_fingerprints/lm/russian.lm
new file mode 100644
index 000000000000..bddb68514275
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/russian.lm
@@ -0,0 +1,400 @@
+_ 76249
+о 19732
+е 16714
+а 14389
+и 13942
+Ñ‚ 13160
+н 12444
+Ñ 9867
+Ñ€ 8461
+в 7895
+л 7330
+к 6498
+м 5935
+. 5725
+у 5287
+д 5019
+п 4877
+Ñ 4083
+, 3899
+,_ 3878
+Ñ‹ 3656
+ь 3376
+и_ 3167
+_п 3144
+е_ 3135
+о_ 3098
+- 3019
+з 2983
+_в 2952
+._ 2930
+_Ñ 2919
+ч 2887
+г 2876
+б 2797
+ÑÑ‚ 2672
+_н 2631
+то 2585
+.. 2407
+_и 2316
+но 2300
+-_ 2294
+й 2281
+а_ 2249
+на 2057
+Ñ_ 2029
+ов 1981
+ни 1950
+_Ñ‚ 1944
+Ñ… 1874
+ен 1856
+_о 1774
+... 1744
+ра 1709
+не 1685
+по 1636
+_- 1625
+ко 1616
+те 1595
+ро 1584
+_к 1558
+_-_ 1531
+й_ 1521
+ет 1518
+ж 1509
+_и_ 1454
+та 1433
+ан 1419
+ер 1396
+от 1389
+ь_ 1381
+го 1375
+ал 1370
+_по 1364
+ре 1345
+ка 1338
+пр 1337
+ва 1329
+ти 1306
+ли 1300
+_д 1297
+_м 1290
+ÐµÑ 1284
+во 1271
+че 1256
+ор 1245
+.... 1232
+ть 1219
+Ð¾Ñ 1212
+ш 1192
+ÑŽ 1187
+в_ 1168
+..... 1167
+он 1147
+ак 1144
+ц 1109
+ог 1101
+ло 1099
+Ñ‚_ 1093
+ри 1076
+м_ 1074
+_пр 1063
+ол 1062
+ль 1045
+_не 1034
+ел 1029
+_б 1026
+ин 1000
+од 998
+ом 996
+ме 993
+Ñ‹_ 975
+ве 968
+Ñк 968
+_на 966
+де 946
+ны 943
+_Ñ€ 931
+_ч 923
+ат 913
+ем 910
+у_ 907
+за 898
+Ñ…_ 898
+ле 889
+то_ 881
+ит 878
+ой 875
+Ñе 862
+_в_ 833
+_з 816
+ки 816
+Ñ‚Ñ€ 807
+" 786
+ть_ 769
+ед 767
+не_ 762
+ÑÑ 754
+ла 748
+об 747
+мо 741
+да 730
+го_ 715
+к_ 713
+Ð¸Ñ 711
+_у 702
+ой_ 697
+ил 694
+ма 692
+нн 687
+до 662
+Ð°Ñ 660
+ам 656
+Ð¸Ñ 649
+же 646
+аз 638
+Ñо 629
+на_ 619
+_г 615
+ми 612
+_а 610
+Ñ 609
+Ð²Ñ 608
+.._ 605
+вы 604
+ого 604
+им 604
+_ко 600
+ав 597
+Ñл 594
+ие 593
+_не_ 589
+_е 587
+_те 583
+ту 583
+ич 583
+ру 575
+оÑÑ‚ 571
+щ 571
+л_ 570
+_Ñ 569
+_Ñ 559
+Ð 556
+ени 544
+из 540
+ек 536
+ова 533
+Ñ„ 525
+: 519
+Ð’ 513
+ани 511
+_Ð²Ñ 510
+ий 510
+Ð 508
+_ка 508
+! 503
+? 501
+ди 498
+ли_ 489
+П 488
+про 486
+_ра 485
+Ñи 484
+ир 484
+_ÑÑ‚ 484
+ьн 484
+льн 484
+:_ 484
+ÑÑ_ 480
+_за 477
+бо 470
+_л 469
+..._ 465
+бы 464
+их 464
+И 462
+ег 461
+тв 459
+Ð½Ð¸Ñ 458
+ÑÑ‚ 454
+чт 454
+ÑÑ‚ 445
+Ñ‡ÐµÑ 442
+_то 442
+иÑ_ 441
+ик 440
+ви 437
+ак_ 436
+Ñта 436
+ого_ 435
+_Ð’ 434
+иче 433
+ци 431
+что 431
+Ñ‹Ñ… 429
+_ÑÑ‚ 429
+С 425
+_чт 424
+_Ð 424
+ÑŽ_ 423
+пе 422
+Ð½Ñ 422
+_что 422
+Ð»Ñ 419
+вÑе 418
+ду 418
+еÑк 415
+нт 413
+как 411
+Ñто 411
+_вы 409
+ну 408
+Ñ‚Ñ 406
+н_ 406
+_П 401
+М 401
+но_ 399
+_про 398
+_. 397
+ров 396
+це 396
+кт 394
+еÑÑ‚ 394
+_" 393
+ше 393
+Ñ_ 392
+_от 392
+О 391
+_на_ 391
+ред 391
+чеÑк 390
+о- 390
+ван 388
+а, 385
+ад 384
+Ð°Ñ 384
+_Ñ_ 383
+а,_ 383
+Ñ‚Ñ‹ 383
+?_ 383
+_об 380
+_вÑе 380
+_та 378
+_как 376
+хо 375
+так 375
+аль 374
+ож 373
+Ñ‹Ñ…_ 372
+ово 372
+ив 371
+_во 369
+му 369
+_Ð 369
+ей 368
+пре 368
+зн 366
+Ð¸Ñ‡ÐµÑ 365
+пи 365
+его 362
+_Ñо 360
+ое 360
+!_ 360
+ать 360
+Ñти 358
+их_ 358
+тн 358
+мен 358
+Ñ‚ÑÑ 356
+ие_ 356
+ичеÑк 354
+бе 352
+_бы 352
+ÑÑ‚Ñ€ 349
+ку 349
+_мо 348
+ет_ 348
+Т 346
+_ме 344
+ев 344
+при 343
+чи 342
+мн 341
+ниÑ_ 339
+ар 338
+нно 337
+ован 334
+ÐºÑ 334
+ур 328
+_че 328
+оль 328
+ут 327
+что_ 324
+ом_ 323
+оп 323
+рм 322
+Ð_ 322
+_что_ 322
+Ñто 322
+оч 321
+о,_ 320
+о, 320
+Ñко 319
+кон 319
+лов 318
+ый 318
+ÑƒÑ 317
+от_ 316
+иро 315
+Ñтв 314
+и, 314
+ий_ 313
+ÑÑ 313
+и,_ 313
+том 312
+ае 312
+енн 311
+ез 311
+ной 311
+_Ñто 310
+К 309
+Ñа 309
+раз 309
+еп 309
+_до 308
+оло 308
+ÑÑ‚ÑŒ 308
+уд 307
+дел 307
+_И 305
+аб 305
+ÑÑŒ 303
+альн 302
+ок 300
+Ñки 300
+ных 300
+_при 298
+ча 297
+_ж 297
+е, 297
+е,_ 297
+_Ð_ 296
+_ни 296
+_._ 295
+Ñ‚ÑÑ_ 295
+ии 294
+зна 293
+Ñ, 292
+Ñ,_ 292
+_пре 291
+_С 290
+ной_ 290
+аÑ_ 288
+_О 288
+али 286
+же_ 285
+Ñп 285
+иÑÑ‚ 285
+ных_ 285
+з_ 283
+_М 283
diff --git a/libtextcat/data/new_fingerprints/lm/sanskrit.lm b/libtextcat/data/new_fingerprints/lm/sanskrit.lm
new file mode 100644
index 000000000000..e21b8712111d
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/sanskrit.lm
@@ -0,0 +1,400 @@
+a 15017
+_ 14975
+h 5028
+i 3820
+t 2976
+s 2788
+r 2599
+| 2437
+n 2432
+aa 2276
+ha 2007
+m 1982
+a_ 1802
+v 1799
+d 1768
+u 1629
+y 1599
+_| 1470
+|_ 1470
+e 1403
+k 1371
+sh 1330
+ra 1243
+p 1213
+va 1152
+A 1134
+ya 1120
+ma 1097
+na 1091
+ar 1080
+ta 1054
+M 1050
+. 1047
+am 1037
+an 1006
+|| 967
+||_ 965
+_|| 965
+_||_ 963
+at 962
+M_ 946
+as 910
+_s 882
+o 848
+b 844
+i_ 827
+aM 764
+aM_ 685
+c 630
+ch 629
+sa 623
+N 616
+ad 612
+H 601
+pa 595
+H_ 584
+g 580
+_n 579
+l 554
+bh 552
+hi 547
+ka 542
+it 536
+ii 535
+ama 519
+_|_ 505
+e_ 477
+_p 475
+dh 475
+av 469
+ak 445
+aH 444
+da 440
+aH_ 439
+ay 437
+j 437
+_na 432
+ana 430
+hh 428
+ti 426
+ara 425
+aa_ 410
+_k 394
+shh 389
+_v 388
+_sa 381
+.h 379
+ah 369
+_b 368
+h_ 363
+.h_ 363
+cha 362
+haa 361
+_t 358
+ri 352
+sha 345
+ap 333
+vi 330
+is 329
+^ 321
+o_ 321
+_m 320
+ai 311
+_d 311
+la 302
+na_ 301
+.n 298
+ava 295
+al 294
+_sh 291
+ja 288
+a. 280
+aan 277
+ish 274
+aN 273
+aya 273
+ash 266
+ha_ 265
+ga 264
+st 255
+ni 255
+ii_ 254
+hu 253
+Na 253
+R 248
+R^ 248
+^i 247
+R^i 247
+a.n 243
+th 241
+_c 240
+_ch 239
+maa 238
+bha 237
+vaa 233
+ab 228
+ir 226
+\ 226
+ita 223
+uu 222
+dha 220
+har 218
+_a 216
+_bh 216
+nam 212
+u_ 212
+he 212
+m.h_ 211
+m. 211
+m.h 211
+ur 210
+es 209
+ata 208
+te 206
+yaa 205
+_ma 204
+esh 202
+aka 200
+id 199
+pr 199
+aha 198
+hy 198
+T 197
+aat 197
+_OM_ 196
+OM 196
+_O 196
+_OM 196
+OM_ 196
+O 196
+ti_ 195
+ari 194
+raa 193
+ag 192
+_y 192
+aas 190
+_ta 190
+_j 189
+I 189
+_na_ 187
+am.h_ 185
+am.h 185
+am. 185
+_pa 183
+iv 182
+de 182
+ada 178
+nd 178
+_cha 177
+_h 176
+ati 175
+taa 173
+ev 172
+nt 171
+rii 171
+ishh 170
+ya_ 168
+_vi 166
+ast 165
+tr 164
+abh 164
+kh 162
+ala 160
+tha 160
+apa 160
+asa 158
+naa 158
+_nam 156
+ru 156
+A_ 155
+_ka 154
+aar 153
+_pr 152
+_g 151
+pra 150
+ham 150
+hha 149
+aana 149
+di 149
+ra_ 147
+ik 146
+.a 144
+yat 143
+ks 143
+hA 143
+hya 143
+ksh 143
+ut 142
+sy 141
+nama 140
+_va 140
+.\ 140
+paa 140
+han 139
+eva 138
+U 138
+mi 138
+_r 136
+_ja 136
+asy 135
+hr 135
+sya 134
+cha_ 132
+rv 132
+tv 130
+asya 130
+kar 130
+ho 129
+yo 129
+in 128
+adh 127
+yA 127
+va_ 126
+su 125
+_ya 125
+shha 124
+pu 124
+R^it 123
+sta 123
+mu 123
+^it 123
+ty 123
+_nama 121
+ac 120
+rii_ 120
+ach 120
+aNa 119
+tas 119
+shi 119
+iva 119
+hav 119
+tra 118
+var 118
+par 118
+haM 117
+aad 117
+kaa 117
+hch 117
+mas 117
+ai_ 117
+hc 117
+sar 116
+aam 116
+_bha 115
+_pra 114
+et 114
+haM_ 113
+aay 113
+aj 113
+ye 113
+o. 113
+An 111
+arii 111
+t.h 110
+t. 110
+ath 110
+t.h_ 109
+man 109
+te_ 108
+o.a 108
+hara 108
+rA 108
+rva 108
+tva 108
+asta 108
+up 108
+shr 107
+daa 104
+me 104
+dr 104
+ram 104
+arii_ 102
+_ni 102
+arv 102
+iH 102
+hit 101
+ras 101
+aga 101
+Am 101
+mA 101
+ba 101
+amas 100
+tu 100
+yaM 100
+ant 99
+ud 99
+uk 98
+iH_ 98
+yaM_ 98
+kha 98
+au 98
+ira 97
+shhT 97
+rah 97
+hT 97
+hhT 97
+D 96
+_sar 96
+re 96
+eshh 95
+sarv 94
+amaa 94
+and 94
+arva 94
+_ra 93
+_dh 93
+tt 92
+tad 92
+hm 92
+raM_ 91
+dev 91
+raM 91
+C 91
+ani 91
+_sarv 90
+atha 89
+Ad 89
+chi 89
+tA 88
+sarva 88
+avi 88
+taM 87
+hava 87
+anaa 86
+vA 86
+dd 86
+nA 85
+Ar 85
+hv 85
+taM_ 85
+dhi 84
+ksha 84
+ip 84
+ma_ 84
+_sha 84
+ati_ 83
+yai 83
+vat 83
+At 83
+kR 82
+kR^ 82
+bhi 82
+_shr 82
+to 82
+ta_ 82
+br 82
+ek 82
+kR^i 82
+tat 81
+nta 81
+hma 81
+aaya 80
+tam 80
+en 80
+us 79
+bra 79
+ke 79
+kt 79
+ddh 79
+mo 79
diff --git a/libtextcat/data/new_fingerprints/lm/scots.lm b/libtextcat/data/new_fingerprints/lm/scots.lm
new file mode 100644
index 000000000000..7aac457075f6
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/scots.lm
@@ -0,0 +1,400 @@
+_ 11688
+e 3223
+a 2469
+t 2269
+i 1928
+n 1903
+r 1414
+o 1406
+h 1369
+s 1249
+l 929
+n_ 890
+_t 862
+_a 843
+d 818
+e_ 798
+th 704
+w 661
+he 625
+an 612
+t_ 606
+u 592
+_th 575
+c 508
+s_ 471
+the 470
+, 469
+- 458
+in 455
+m 445
+,_ 440
+b 434
+g 429
+er 409
+ee 408
+_the 407
+k 402
+an_ 402
+f 385
+_w 378
+he_ 376
+the_ 364
+_an 362
+_o 360
+y 358
+_the_ 354
+_s 353
+_an_ 342
+a_ 335
+r_ 327
+_b 316
+d_ 303
+i_ 278
+en 277
+p 270
+ei 245
+A 236
+wa 232
+_A 231
+re 229
+in_ 229
+ui 218
+oo 217
+le 217
+ai 216
+et 212
+ti 209
+it 209
+_f 206
+te 204
+_a_ 203
+_m 202
+ha 200
+as 193
+on 188
+at 184
+_i 183
+_wa 183
+_c 182
+o_ 180
+or 178
+_h 176
+_g 169
+ch 165
+A_ 159
+_l 158
+_A_ 157
+st 156
+_d 155
+_ti 148
+. 148
+._ 146
+ke 144
+ti_ 143
+-- 143
+_o_ 142
+ow 142
+--- 140
+ed 138
+---- 138
+_r 137
+as_ 137
+y_ 136
+er_ 136
+----- 136
+ir 135
+aa 135
+la 131
+een 130
+ae 129
+_ti_ 128
+ra 126
+es 125
+nd 124
+de 120
+h_ 120
+ie 120
+ar 119
+ll 119
+nt 118
+ot 118
+en_ 115
+ma 115
+eet 113
+her 112
+el 112
+is 112
+' 112
+at_ 111
+ic 109
+se 108
+or_ 106
+wu 104
+me 104
+ne 103
+fo 102
+on_ 101
+was 99
+_was 98
+et_ 98
+ri 98
+_e 97
+_ma 97
+v 97
+_n 97
+! 97
+li 97
+ht 93
+hi 92
+_wu 92
+ng 91
+ro 91
+it_ 90
+ck 90
+_fo 90
+tha 90
+k_ 89
+il 89
+cht 86
+eet_ 86
+_p 86
+we 86
+_was_ 85
+was_ 85
+rt 84
+ed_ 83
+ter 83
+id 83
+ga 82
+; 82
+;_ 81
+ther 79
+tt 76
+air 76
+e, 75
+un 75
+ho 75
+for 74
+ge 74
+_st 73
+_y 72
+_he 72
+wh 71
+_on 71
+sh 70
+z 70
+e,_ 69
+bi 68
+_tha 68
+wui 67
+!_ 67
+ad 67
+een_ 66
+l_ 66
+ts 66
+_for 66
+n, 66
+_wh 65
+re_ 65
+be 65
+eh 64
+hat 64
+ns 64
+br 64
+g_ 64
+ui_ 64
+rr 64
+wui_ 63
+ni 63
+_wui 62
+ay 62
+s, 62
+pe 61
+n,_ 61
+bo 61
+al 61
+ye 61
+_bi 60
+oot 60
+na 60
+ang 60
+s,_ 59
+es_ 59
+ill 58
+that 58
+_wui_ 58
+nn 58
+eh_ 58
+oa 57
+han 57
+_that 56
+_br 56
+ca 56
+_ga 56
+ng_ 56
+um 55
+hat_ 55
+oon 55
+od 55
+for_ 55
+no 55
+ree 55
+_for_ 54
+_le 54
+ht_ 54
+ot_ 54
+_k 53
+rd 53
+ki 53
+aw 53
+nd_ 53
+_on_ 53
+_it 53
+ik 53
+t, 53
+_be 52
+that_ 52
+ve 52
+rn 52
+'s 51
+au 51
+co 51
+ich 51
+to 51
+lo 51
+t,_ 51
+ea 51
+tee 51
+lan 50
+fi 50
+_at 50
+am 50
+_in 50
+ere 50
+ur 50
+le_ 50
+nt_ 49
+'s_ 49
+hin 49
+yi 49
+hr 49
+ts_ 49
+_ca 48
+" 48
+ta 48
+cht_ 48
+-_ 48
+_as 47
+T 47
+ang_ 47
+lei 46
+_ma_ 46
+tr 46
+_ro 46
+fe 46
+ma_ 46
+icht 46
+_as_ 46
+der 46
+cl 46
+e- 45
+n- 45
+thr 45
+ba 45
+m_ 45
+st_ 45
+rt_ 45
+_u 45
+do 45
+_T 45
+im 44
+_se 44
+sk 44
+_la 44
+eik 44
+bit 43
+ike 43
+B 43
+kee 43
+tte 43
+di 43
+eed 43
+_B 42
+_aa 42
+her_ 42
+da 42
+ff 42
+tu 42
+ie_ 42
+_cl 42
+_ba 42
+oot_ 42
+bu 41
+eike 41
+oc 41
+hu 41
+_thr 41
+ther_ 41
+_co 41
+aa_ 41
+so 41
+_me 41
+H 41
+_H 40
+ke_ 40
+ert 40
+lu 40
+ist 40
+si 40
+iz 40
+ar_ 39
+uc 39
+thi 39
+ad_ 39
+ru 39
+owe 39
+gi 38
+_bit 38
+_do 38
+int 38
+bl 38
+ld 38
+_at_ 38
+lt 38
+ac 38
+_ha 38
+ae_ 38
+rs 37
+here 37
+ei_ 37
+han_ 37
+p_ 37
+is_ 37
+eth 37
+fa 37
+_sk 37
+ll_ 37
+ss 36
+bra 36
+wha 36
+gl 36
+ck_ 36
+pl 36
+lin 36
+ir_ 36
+ab 36
+_ther 36
+_da 35
+ce 35
+rin 35
+_oo 35
+rl 35
+wee 35
+and 35
+sa 35
+_yi 35
+_bra 35
+'d 35
+ds 35
+_bo 35
diff --git a/libtextcat/data/new_fingerprints/lm/scots_gaelic.lm b/libtextcat/data/new_fingerprints/lm/scots_gaelic.lm
new file mode 100644
index 000000000000..491862a8c9d2
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/scots_gaelic.lm
@@ -0,0 +1,400 @@
+_ 12634
+a 5353
+h 3268
+i 2898
+n 2792
+e 1651
+r 1563
+d 1455
+_a 1425
+c 1245
+n_ 1236
+s 1165
+l 1152
+an 1121
+t 980
+ai 979
+g 962
+u 905
+ch 902
+ha 836
+h_ 833
+a_ 829
+ea 821
+o 794
+dh 726
+an_ 711
+b 639
+m 585
+na 514
+nn 506
+ac 498
+r_ 495
+s_ 482
+ir 480
+ach 466
+id 458
+_an 450
+_c 427
+th 403
+à 388
+he 383
+in 379
+bh 367
+idh 358
+ad 342
+_n 341
+il 332
+nn_ 323
+_t 322
+_d 319
+ar 317
+e_ 311
+dh_ 307
+_an_ 303
+_b 302
+_na 294
+air 289
+ig 279
+. 275
+ir_ 272
+ag 272
+_ai 272
+, 270
+gu 269
+,_ 269
+._ 265
+ean 264
+ch_ 261
+f 259
+? 258
+_s 255
+ann 250
+ra 241
+ei 241
+_a_ 241
+ha_ 241
+d_ 238
+- 235
+_m 231
+gh 230
+hea 228
+le 226
+_f 224
+ui 223
+is 223
+as 218
+adh 218
+l_ 216
+g_ 208
+ài 207
+ò 207
+hai 205
+cha 205
+air_ 204
+na_ 201
+inn 198
+tha 190
+C 189
+G 188
+ann_ 187
+_ag 186
+_air 186
+eac 185
+_g 185
+_na_ 184
+ach_ 184
+_C 183
+us 183
+_ch 183
+la 182
+_G 182
+each 181
+us_ 178
+al 178
+gus 176
+gus_ 176
+_th 169
+_air_ 168
+_agus 167
+agus_ 167
+_agu 167
+agus 167
+agu 167
+ta 164
+aid 163
+hi 163
+hd 163
+chd 160
+T 157
+A 156
+ic 152
+_T 152
+adh_ 150
+idh_ 148
+mh 147
+?_ 146
+ar_ 145
+oi 144
+da 143
+aidh 143
+_bh 139
+ean_ 138
+sa 138
+ig_ 138
+_r 136
+_A 134
+ì 134
+te 134
+achd 131
+hu 131
+_e 130
+aig 130
+_l 130
+_ann 129
+ain 127
+ne 127
+dhe 125
+_dh 125
+àid 123
+o_ 121
+hl 119
+acha 119
+ga 118
+àidh 118
+on 118
+it 117
+aidh_ 116
+de 115
+nan 115
+ua 115
+_ann_ 115
+ich 115
+il_ 114
+m_ 114
+eil 114
+ri 112
+at 112
+ma 111
+li 109
+ao 109
+re 109
+inn_ 108
+_tha 107
+fh 106
+as_ 106
+bh_ 106
+nan_ 103
+lea 103
+lt 103
+S 103
+a? 103
+a?_ 102
+io 102
+E 101
+am 101
+' 100
+_a? 100
+igh 100
+_a?_ 99
+_gu 99
+idhe 99
+t_ 99
+se 99
+si 98
+ba 97
+ù 97
+tha_ 96
+bha 95
+B 94
+is_ 94
+u_ 94
+_B 94
+_i 93
+ile 92
+aic 91
+hei 91
+ia 90
+ho 89
+Th 88
+ath 88
+_Th 88
+rt 87
+ib 87
+Gàid 86
+_Gài 86
+_Gà 86
+Gài 86
+òr 86
+Gà 86
+Gàidh 86
+_Gàid 86
+had 85
+ibh 85
+_fh 85
+p 84
+ad_ 83
+_? 83
+_E 83
+hd_ 82
+dhea 82
+chd_ 82
+ear 81
+ith 81
+_tha_ 80
+h- 79
+eal 78
+hean 78
+sg 77
+rea 77
+_S 76
+ais 75
+ll 75
+han 74
+hà 74
+achd_ 74
+ead 74
+idhea 73
+am_ 72
+dha 72
+_nan 71
+_nan_ 71
+hadh 71
+gh_ 71
+ail 70
+hui 70
+Ch 69
+eachd 69
+h. 69
+aich 69
+hli 69
+chai 69
+om 68
+fa 68
+chad 68
+I 67
+h._ 67
+_Ch 67
+tea 67
+nea 66
+chadh 66
+achad 66
+rai 66
+lig 66
+haid 66
+dea 66
+rt_ 65
+àr 65
+dhl 65
+ana 64
+eann 64
+Ei 64
+le_ 64
+hn 64
+ilt 64
+uid 64
+_fa 63
+_Tha 63
+Tha 63
+ob 63
+_si 62
+ro 62
+cu 62
+ainn 62
+un 62
+dhli 61
+idhli 61
+lean 61
+idhl 61
+àidhl 61
+hlig 61
+dhlig 61
+in_ 60
+_à 60
+st 60
+rr 60
+_cu 60
+hr 60
+_aig 60
+bhe 59
+i_ 59
+aigh 59
+Tha_ 59
+è 59
+_ri 59
+_Tha_ 59
+lb 58
+che 58
+ran 58
+nac 58
+haidh 58
+hadh_ 58
+aig_ 58
+Gh 58
+ilea 58
+_Gh 58
+lte 58
+_le 58
+ru 58
+àidhe 57
+_I 57
+ilte 57
+eadh 57
+M 56
+hlig_ 56
+L 56
+chu 56
+nach 56
+_ma 56
+lig_ 56
+h,_ 55
+th_ 55
+ibh_ 55
+_aig_ 55
+D 55
+atha 55
+_Ei 55
+h, 55
+gu_ 54
+_gu_ 54
+im 54
+eil_ 54
+eu 53
+_M 53
+Al 53
+irt 53
+_L 53
+iad 53
+sea 52
+lba 52
+Alba 52
+F 52
+Alb 52
+uai 52
+ich_ 52
+_F 51
+ilean 51
+has 51
+tai 51
+each_ 50
+eacha 50
+har 50
+ni 50
+_de 50
+irt_ 50
+n,_ 50
+mha 50
+n, 50
+_e_ 50
+ide 49
+neach 49
+neac 49
+ur 49
+rd 49
+_h 49
+hean_ 49
+oc 49
+eò 49
+te_ 49
+han_ 49
+on_ 49
diff --git a/libtextcat/data/new_fingerprints/lm/serbian_ascii.lm b/libtextcat/data/new_fingerprints/lm/serbian_ascii.lm
new file mode 100644
index 000000000000..9471be6eabd5
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/serbian_ascii.lm
@@ -0,0 +1,400 @@
+_ 34122
+a 9113
+o 8135
+i 7736
+e 7535
+n 5207
+s 4860
+j 3995
+t 3797
+r 3660
+u 3224
+l 3065
+d 3061
+e_ 2941
+v 2786
+a_ 2746
+k 2701
+m 2492
+o_ 2328
+y 2238
+p 2151
+_s 2148
+i_ 2094
+je 1887
+c 1854
+z 1731
+_n 1437
+_p 1432
+g 1418
+b 1368
+u_ 1333
+je_ 1237
+, 1226
+,_ 1214
+_i 1130
+st 1105
+na 1076
+. 1056
+_d 1033
+._ 1030
+_j 967
+ra 934
+ko 908
+ni 900
+cy 893
+sy 875
+_je 871
+_o 824
+ta 799
+no 780
+_u 777
+re 766
+_b 764
+_k 763
+da 760
+ne 754
+li 750
+ti 745
+se 722
+po 713
+to 713
+_je_ 696
+an 688
+ja 683
+pr 665
+va 651
+lo 634
+_z 626
+m_ 625
+is 625
+il 622
+ov 621
+la 621
+_m 615
+bi 604
+_t 603
+_po 594
+en 586
+_se 578
+os 578
+in 576
+od 576
+ka 552
+ve 548
+ij 538
+_pr 536
+al 536
+vo 535
+om 530
+_i_ 525
+nj 515
+ed 509
+_na 507
+na_ 503
+og 499
+oj 498
+ma 493
+_bi 492
+on 489
+ak 482
+im 481
+ye 481
+ro 480
+vi 473
+sa 469
+ri 464
+da_ 451
+av 450
+at 449
+se_ 447
+es 446
+h 443
+ao 441
+ji 437
+yi 436
+_da 433
+ad 432
+_se_ 430
+lj 428
+zy 426
+za 426
+_ne 425
+de 422
+tr 417
+cj 415
+_u_ 414
+_c 412
+le 402
+_v 397
+ar 390
+_g 390
+ic 384
+n_ 382
+ju 379
+lo_ 377
+aj 376
+_ko 369
+ao_ 366
+ek 361
+_da_ 359
+et 356
+go 354
+iz 346
+_za 345
+_r 344
+or 342
+mo 341
+el 340
+as 339
+ik 336
+te 332
+_sa 329
+d_ 323
+am 320
+me 318
+sto 317
+di 315
+ec 311
+ol 310
+a,_ 307
+a, 307
+_ni 302
+ya 296
+do 295
+yt 294
+su 292
+syt 289
+li_ 288
+sta 286
+ije 284
+ko_ 277
+ti_ 277
+la_ 277
+ga 276
+bil 275
+no_ 274
+a. 273
+nu 272
+a._ 271
+ne_ 271
+om_ 268
+_cy 266
+_na_ 263
+_bil 263
+sv 263
+ru 259
+to_ 256
+_od 253
+cyi 253
+nje 251
+it 251
+pa 250
+az 248
+e,_ 245
+e, 245
+ob 244
+dn 243
+ac 242
+ost 242
+k_ 240
+iv 239
+io 238
+_su 238
+_iz 237
+ilo 235
+_sv 234
+_ka 233
+koj 231
+mi 229
+im_ 229
+ije_ 227
+g_ 226
+em 223
+su_ 223
+ih 223
+ji_ 221
+kr 220
+ut 220
+_koj 220
+V 218
+_st 218
+ye_ 217
+_l 214
+_V 213
+ovo 211
+j_ 210
+uc 208
+ja_ 208
+h_ 207
+nij 206
+sk 206
+ot 203
+io_ 203
+gl 203
+_do 201
+ok 200
+ns 199
+ilo_ 199
+er 197
+ih_ 195
+pre 193
+ci 193
+og_ 193
+ki 192
+sl 191
+t_ 189
+ni_ 189
+_a 189
+vr 188
+ati 187
+_su_ 186
+nije 181
+pro 181
+be 180
+yn 179
+cye 178
+ju_ 178
+ku 177
+isy 177
+ta_ 174
+sye 172
+_tr 172
+O 172
+jen 172
+_to 171
+pi 168
+_pre 168
+S 168
+ima 167
+nije_ 167
+_mo 166
+eg 166
+e._ 164
+za_ 164
+e. 164
+_pro 164
+gov 163
+N 162
+dr 162
+ako 162
+tv 162
+_S 160
+P 159
+ma_ 159
+_on 159
+sp 158
+nst 158
+anj 158
+dj 157
+oc 157
+_sy 156
+ev 155
+ce 155
+lik 154
+_nij 153
+_N 152
+ist 151
+_P 151
+_nije 151
+- 151
+ba 150
+jed 150
+sti 150
+ova 149
+_is 148
+id 148
+ton 148
+ke 147
+pos 147
+od_ 147
+osy 146
+Vi 146
+ila 145
+ins 145
+bo 145
+_Vi 145
+ir 144
+_za_ 144
+oz 144
+ecj 144
+cje 143
+on_ 143
+zn 142
+_O 141
+us 141
+i, 141
+i,_ 141
+mu 140
+inst 140
+cya 140
+oji 139
+esy 139
+icy 139
+lja 138
+_go 138
+i. 138
+_re 137
+_bilo 137
+edn 137
+acy 137
+rat 137
+bilo 137
+ali 136
+ecy 136
+ija 135
+pri 135
+ad_ 135
+lic 135
+i._ 135
+Vins 134
+Vin 134
+ston 134
+Vinst 134
+ga_ 134
+nston 134
+insto 134
+nsto 134
+_Vins 133
+_Vin 133
+zi 132
+ran 131
+le_ 130
+ili 130
+bilo_ 130
+_pos 129
+ila_ 129
+est 128
+_ve 128
+tre 128
+zye 127
+_nj 127
+si 126
+f 126
+alo 125
+ako_ 125
+tra 125
+sa_ 125
+pu 124
+ud 124
+z_ 124
+_ra 124
+iti 124
+_de 124
+odi 123
+T 123
+-_ 122
+o,_ 121
+o, 121
+du 121
+rs 121
+B 120
+ka_ 119
+red 119
+_od_ 118
+an_ 118
+nu_ 118
+iko 117
+dno 117
+_pa 117
+s_ 116
diff --git a/libtextcat/data/new_fingerprints/lm/slovak_ascii.lm b/libtextcat/data/new_fingerprints/lm/slovak_ascii.lm
new file mode 100644
index 000000000000..29c8736b3ba3
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/slovak_ascii.lm
@@ -0,0 +1,400 @@
+_ 20064
+a 4991
+o 4983
+e 3838
+n 3342
+i 3317
+r 2583
+s 2501
+v 2383
+t 2325
+c 1918
+k 1912
+l 1888
+d 1736
+u 1725
+p 1543
+a_ 1527
+y 1371
+m 1339
+z 1227
+h 1194
+e_ 956
+_p 881
+_s 828
+o_ 814
+na 809
+b 808
+_v 798
+j 797
+. 796
+ov 795
+._ 785
+st 687
+i_ 665
+, 657
+,_ 657
+_n 625
+ch 623
+u_ 618
+ro 617
+po 598
+_o 546
+ne 529
+en 520
+v_ 519
+_a 516
+ra 506
+pr 488
+y_ 481
+od 476
+_z 467
+ie 462
+ni 448
+an 447
+vy 434
+to 433
+h_ 431
+_na 429
+re 416
+ch_ 404
+ho 401
+al 399
+ci 394
+va 387
+na_ 386
+_pr 385
+_d 384
+_k 378
+la 377
+ko 375
+do 374
+_po 373
+si 353
+_t 346
+_r 337
+os 336
+no 334
+in 328
+tr 324
+om 321
+_v_ 320
+ny 319
+m_ 319
+ri 317
+S 306
+ac 302
+sa 300
+ti 300
+_m 298
+za 298
+er 291
+ia 290
+ce 290
+li 289
+yc 286
+ych 285
+ed 284
+at 281
+ob 281
+ak 280
+_na_ 280
+il 279
+_, 279
+_,_ 279
+ok 277
+sk 268
+ych_ 265
+_c 262
+mi 261
+ol 260
+me 260
+l_ 259
+t_ 259
+ku 258
+ta 256
+le 256
+_b 254
+or 252
+_a_ 250
+lo 247
+oc 246
+vo 246
+es 244
+ve 242
+_vy 240
+on 238
+_sa 231
+as 231
+da 230
+aj 228
+av 218
+el 216
+ova 216
+ic 215
+ne_ 209
+_do 208
+sa_ 207
+ka 205
+_sa_ 204
+te 203
+j_ 201
+_ro 199
+P 198
+_za 196
+ky 196
+_S 195
+je 194
+ar 193
+_. 193
+it 192
+s_ 192
+em 191
+ej 191
+ur 190
+ad 189
+_o_ 187
+_._ 187
+ov_ 185
+de 180
+_% 179
+om_ 179
+% 179
+_u 174
+pre 173
+dn 173
+D 172
+rok 170
+ie_ 170
+sp 169
+pri 167
+_pre 167
+am 165
+- 164
+ke 164
+eh 162
+oz 161
+k_ 160
+ost 160
+_j 156
+f 155
+zn 153
+g 152
+kt 152
+ho_ 151
+eho 151
+hod 150
+ku_ 148
+is 148
+zi 147
+cn 147
+eho_ 146
+ej_ 145
+az 145
+tu 145
+_pri 144
+cen 144
+_st 143
+ma 142
+ast 141
+_ce 140
+rov 140
+la_ 138
+ot 138
+nych 135
+nyc 135
+_ob 133
+z_ 133
+nych_ 131
+N 130
+li_ 129
+ani 129
+co 128
+nt 128
+ny_ 127
+E 125
+_ne 124
+) 123
+_( 123
+sti 123
+A 123
+( 123
+cho 122
+vi 122
+_sp 122
+di 120
+pa 120
+n_ 119
+ju 118
+ys 117
+bo 117
+_P 116
+_tr 115
+V 114
+je_ 114
+ln 114
+_i 113
+ze 113
+spo 112
+_N 112
+nd 111
+nu 111
+so 111
+red 110
+vn 110
+kl 110
+kov 110
+_cen 110
+_rok 109
+tn 109
+du 109
+nc 109
+ap 109
+d_ 108
+van 108
+ca 108
+M 108
+chod 107
+ti_ 107
+U 106
+_ak 106
+ru 105
+sta 105
+ym 105
+_- 104
+et 103
+_h 102
+est 102
+_je 102
+nos 101
+aci 101
+us 100
+dov 100
+pod 100
+_to 100
+tor 99
+uc 99
+ras 98
+ky_ 98
+_s_ 98
+_mi 97
+* 97
+uj 97
+nost 97
+vys 97
+ovy 97
+ez 97
+oku 96
+_V 96
+op 96
+bc 96
+rast 96
+se 95
+B 95
+roku 95
+kto 94
+ove 94
+by 94
+-_ 94
+_ko 93
+obc 92
+nie 91
+ia_ 91
+ka_ 91
+*_ 90
+ali 90
+lo_ 89
+ovan 89
+to_ 88
+iz 88
+_bo 88
+_l 88
+odo 87
+bch 87
+bchod 87
+bcho 87
+sl 86
+st_ 86
+pred 86
+C 86
+pol 85
+_pred 85
+R 85
+ik 84
+uro 84
+pi 84
+ek 84
+zo 83
+eni 83
+obch 83
+cie 83
+oku_ 83
+obcho 83
+ns 83
+roku_ 82
+ii 82
+tv 82
+ba 82
+ent 82
+_spo 81
+tov 81
+pe 81
+kon 80
+kc 80
+ec 80
+kci 80
+ck 80
+x 79
+osti 79
+_Sk 79
+mi_ 79
+_in 79
+Sk 79
+sia 79
+br 78
+rh 78
+val 78
+olo 77
+_pod 77
+%_ 77
+_%_ 77
+bu 77
+_f 77
+iv 77
+_obc 77
+_obch 77
+eb 76
+str 76
+nej 76
+_D 76
+ni_ 75
+ou 75
+im 75
+ena 74
+tre 74
+_A 74
+mo 74
+su 74
+rz 73
+_trh 73
+trh 73
+_U 73
+al_ 73
+_ra 73
+_e 72
+_C 72
+sti_ 72
+zv 72
+te_ 72
+cno 72
+oj 72
+ktor 71
+_roku 71
+ocn 71
+ina 71
+sil 71
+nov 71
+alo 71
+odn 70
+nan 70
+oh 70
diff --git a/libtextcat/data/new_fingerprints/lm/slovenian.lm b/libtextcat/data/new_fingerprints/lm/slovenian.lm
new file mode 100644
index 000000000000..0fb3f18f1659
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/slovenian.lm
@@ -0,0 +1,400 @@
+_ 10406
+a 2828
+e 2676
+i 2458
+o 2418
+n 1814
+r 1484
+v 1253
+l 1248
+s 1228
+t 1172
+j 1107
+d 1085
+k 911
+p 880
+a_ 823
+m 763
+i_ 681
+e_ 678
+_p 603
+o_ 566
+u 521
+z 516
+b 456
+_s 435
+je 434
+, 416
+,_ 411
+ni 399
+Ä 383
+_v 372
+_d 356
+pr 355
+g 345
+ra 336
+_n 332
+st 323
+an 313
+po 303
+re 301
+na 295
+h 287
+ov 276
+_pr 276
+li 275
+al 274
+_z 270
+je_ 259
+la 255
+Å¡ 253
+ne 248
+en 246
+ko 244
+in 237
+c 234
+ti 234
+v_ 234
+_po 232
+no 230
+ve 230
+_k 227
+_i 224
+da 224
+. 221
+_j 221
+ri 220
+ja 216
+_t 214
+se 213
+ed 212
+._ 211
+em 206
+te 205
+za 201
+od 201
+av 200
+lo 196
+nj 194
+_o 194
+_je 193
+il 190
+or 183
+ka 181
+sk 179
+_b 178
+_je_ 178
+ih 178
+n_ 177
+_za 173
+h_ 171
+er 171
+os 171
+_na 168
+va 168
+ta 164
+le 163
+m_ 161
+ev 157
+ij 157
+ar 157
+do 155
+to 155
+ž 154
+A 153
+el 150
+_m 148
+ro 147
+ol 146
+_v_ 145
+aj 145
+di 143
+N 142
+S 142
+at 140
+ih_ 139
+ki 138
+de 137
+_in 135
+vo 135
+ga 134
+me 131
+in_ 129
+vi 129
+om 127
+_in_ 125
+et 124
+pre 124
+O 123
+bi 120
+I 119
+da_ 117
+ik 117
+ma 115
+E 114
+so 113
+bo 112
+it 112
+anj 112
+eg 110
+ni_ 109
+mi 108
+ke 108
+na_ 108
+u_ 108
+lj 106
+iz 105
+ob 105
+_da 103
+li_ 103
+is 103
+im 102
+red 102
+_pre 102
+dr 100
+mo 99
+P 99
+_se 99
+ji 98
+r_ 97
+ad 97
+pri 97
+K 97
+_l 97
+tr 95
+pa 94
+no_ 94
+j_ 92
+ki_ 91
+ti_ 91
+_pri 91
+dn 89
+_P 88
+ej 88
+_da_ 87
+ne_ 86
+ega 86
+_r 86
+_bi 86
+l_ 86
+em_ 86
+go 86
+" 85
+sl 85
+ek 84
+ali 84
+ove 84
+aÄ 84
+ak 84
+ci 83
+ga_ 83
+ko_ 83
+se_ 82
+_S 82
+jo 81
+ot 81
+ja_ 81
+_so 80
+lov 80
+L 80
+D 79
+V 79
+as 78
+_do 78
+am 78
+nje 77
+es 77
+za_ 77
+_pa 76
+T 75
+tu 75
+_za_ 74
+sti 74
+_dr 74
+la_ 74
+_N 74
+_de 74
+ega_ 73
+_ko 73
+og 73
+ns 72
+Äe 72
+ds 72
+_bo 71
+ora 71
+vn 71
+ost 71
+_ne 71
+iÄ 70
+ven 69
+z_ 69
+Äi 69
+_te 68
+ce 68
+_se_ 67
+Äa 67
+oÄ 67
+M 66
+_u 66
+un 65
+ln 65
+pos 64
+ju 64
+sta 64
+op 64
+di_ 63
+ud 63
+vs 63
+t_ 62
+nsk 62
+tv 62
+on 62
+ski 62
+R 62
+pa_ 62
+_ka 62
+i, 61
+so_ 61
+_iz 60
+_pa_ 60
+s_ 60
+i,_ 60
+pro 59
+del 59
+rav 59
+eni 59
+oli 58
+rj 58
+e, 57
+Å¡e 57
+ili 57
+vr 57
+d_ 57
+_le 57
+pred 57
+jo_ 56
+e,_ 56
+nik 56
+love 56
+_pred 56
+ske 56
+er_ 55
+str 55
+Än 54
+pra 54
+J 54
+_Å¡ 54
+oven 53
+_ra 53
+tn 53
+_na_ 53
+_so_ 53
+nih 53
+loven 53
+si 52
+ke_ 52
+_g 52
+ic 52
+udi 51
+bi_ 51
+eds 51
+oj 51
+ru 51
+a, 51
+_pro 50
+_pos 50
+nc 50
+nih_ 50
+Äu 50
+a,_ 50
+_a 50
+az 50
+ok 50
+B 50
+let 49
+udi_ 49
+_od 49
+_K 49
+aj_ 48
+_bi_ 48
+_ve 48
+raÄ 48
+o, 47
+_tu 47
+ija 47
+ter 47
+ist 47
+Z 47
+reds 46
+nd 46
+ali_ 46
+A_ 46
+iti 46
+bil 46
+_ob 46
+o,_ 46
+ati 46
+tud 45
+tudi 45
+_ki 45
+k_ 45
+be 45
+aš 45
+ir 45
+ža 45
+do_ 45
+sp 45
+_ki_ 45
+_st 45
+ep 44
+_del 44
+tudi_ 44
+rž 44
+aÄu 44
+_ni 44
+ah 43
+raÄu 43
+raÄun 43
+iš 43
+_mo 43
+avn 43
+_tud 43
+Äun 43
+aÄun 43
+_tudi 43
+_to 42
+raz 42
+kr 42
+ova 42
+_e 42
+ogo 42
+ani 42
+_" 42
+ev_ 42
+br 42
+eb 42
+sa 42
+mi_ 42
+tem 42
+ta_ 41
+prav 41
+i. 41
+slov 41
+ens 41
+bo_ 41
+že 41
+_T 41
+_let 41
+odo 41
+slo 41
+ensk 40
+ka_ 40
+neg 40
+ez 40
+nos 40
+eÄ 40
+_sl 40
+_V 40
+rža 40
+nega 40
+ili_ 39
diff --git a/libtextcat/data/new_fingerprints/lm/spanish.lm b/libtextcat/data/new_fingerprints/lm/spanish.lm
new file mode 100644
index 000000000000..e40317f956a9
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/spanish.lm
@@ -0,0 +1,400 @@
+_ 25044
+e 7830
+a 7437
+o 5102
+s 4394
+n 4358
+i 4065
+r 3998
+l 3634
+d 3118
+c 2931
+t 2834
+u 2316
+a_ 2269
+e_ 2211
+s_ 1862
+de 1679
+p 1673
+_d 1644
+m 1447
+_de 1443
+n_ 1332
+o_ 1301
+en 1295
+_e 1216
+es 1177
+_l 1132
+de_ 1080
+la 1060
+os 1028
+_de_ 1027
+_p 963
+l_ 910
+ci 890
+_c 866
+_a 866
+os_ 801
+ar 777
+er 775
+as 768
+ra 746
+nt 736
+_la 727
+re 726
+,_ 724
+, 724
+el 722
+ta 708
+ue 701
+g 678
+on 674
+al 670
+_s 666
+co 653
+b 637
+an 622
+v 616
+la_ 616
+or 612
+te 599
+st 596
+el_ 580
+_la_ 573
+y 545
+to 543
+r_ 517
+ad 512
+ó 511
+do 504
+ro 504
+se 488
+as_ 488
+q 487
+qu 487
+. 479
+._ 478
+en_ 475
+ca 460
+in 459
+un 456
+_co 450
+es_ 449
+ic 449
+_en 440
+ac 440
+que 439
+na 439
+lo 430
+_m 430
+f 429
+ent 428
+da 412
+ue_ 411
+po 405
+le 399
+_q 399
+_qu 399
+que_ 393
+_que 388
+ie 386
+h 385
+pa 382
+y_ 371
+ti 367
+_que_ 365
+_en_ 365
+_y 361
+tr 358
+_el 353
+ri 349
+ia 342
+_el_ 333
+_se 330
+ió 330
+_y_ 330
+io 329
+pr 320
+ón 317
+ec 317
+no 314
+id 301
+í 300
+mi 299
+_t 299
+ión 292
+nte 292
+me 286
+aci 283
+do_ 279
+li 276
+con 276
+nd 273
+est 272
+ni 272
+á 271
+di 270
+_es 268
+_lo 267
+ció 265
+ma 265
+ón_ 264
+_pr 263
+_r 261
+ción 255
+z 254
+ra_ 251
+si 247
+ión_ 246
+oc 245
+nc 244
+_u 244
+_po 243
+los 243
+or_ 242
+_con 241
+is 239
+del 238
+_del 237
+ado 236
+se_ 233
+_i 233
+los_ 231
+_re 231
+por 229
+_del_ 228
+sta 228
+del_ 228
+al_ 228
+ne 226
+_h 226
+cu 225
+_n 225
+_a_ 224
+_v 224
+_un 223
+ce 222
+so 220
+ción_ 218
+res 218
+vi 217
+om 216
+te_ 212
+_pa 211
+ien 210
+j 209
+E 208
+_los 207
+_los_ 207
+to_ 206
+ol 204
+it 203
+am 202
+ació 201
+rt 201
+ación 201
+pe 197
+ha 190
+_se_ 189
+nto 188
+_o 184
+_E 184
+on_ 184
+sa 183
+na_ 182
+ta_ 181
+su 180
+cia 180
+mo 180
+ct 178
+par 178
+_f 177
+_por 176
+eg 172
+_in 172
+ur 170
+L 168
+ve 166
+im 164
+ga 163
+_est 161
+ar_ 161
+ab 160
+_L 159
+tu 158
+at 158
+no_ 157
+s, 157
+s,_ 157
+_por_ 156
+por_ 156
+las 156
+ba 154
+o,_ 154
+o, 154
+ento 151
+et 150
+C 150
+_ha 149
+A 149
+tra 148
+ient 148
+_al 147
+a,_ 146
+ica 146
+a, 146
+pro 146
+ado_ 145
+ici 144
+_ca 144
+an_ 144
+las_ 143
+ara 143
+nci 143
+ente 142
+ú 142
+rr 142
+ir 142
+da_ 141
+em 141
+ll 140
+il 139
+ía 138
+iv 138
+_su 138
+_par 136
+ul 136
+ant 136
+_A 135
+mp 135
+_las_ 134
+_las 134
+_C 134
+_pro 133
+men 132
+P 132
+des 131
+com 130
+ion 130
+era 130
+ed 129
+ida 129
+sp 128
+gu 127
+nte_ 127
+ns 127
+za 126
+dos 125
+M 125
+cio 125
+les 125
+_P 124
+bl 124
+_com 122
+s._ 122
+s. 122
+_M 121
+ua 120
+nta 120
+mu 119
+_no 118
+dad 118
+ñ 117
+é 116
+un_ 116
+va 116
+ist 116
+nes 116
+iento 115
+one 114
+ara_ 113
+S 113
+ada 113
+_un_ 113
+fi 111
+pre 110
+tos 110
+ter 109
+ot 109
+esta 108
+_me 107
+ido 107
+ob 107
+_g 105
+br 105
+go 105
+ea 104
+nto_ 104
+ona 103
+pu 103
+dos_ 103
+tro 103
+ier 103
+para 102
+ment 101
+ag 101
+ero 101
+gr 101
+rec 101
+bi 101
+ia_ 100
+una 100
+nic 99
+ncia 99
+ía_ 98
+a._ 98
+tos_ 98
+a. 98
+ran 98
+lo_ 97
+ones 97
+rm 96
+lu 96
+ron 95
+con_ 95
+ó_ 95
+nes_ 95
+_ci 95
+ante 94
+ch 94
+_con_ 94
+_para 94
+ntr 93
+una_ 93
+para_ 93
+mie 92
+ico 92
+fe 92
+les_ 92
+uc 92
+ip 91
+sto 91
+_ma 91
+ui 91
+sta_ 91
+_ve 90
+cion 90
+" 90
+op 90
+cal 89
+_mu 89
+_S 89
+ro_ 89
+_pe 88
+ste 88
+ras 88
+pl 88
+_una 88
+_di 87
+ento_ 86
+ita 86
+ione 85
+ect 85
+_una_ 85
+mien 85
+tan 85
+du 84
+den 84
+ndo 84
+per 84
+eri 84
diff --git a/libtextcat/data/new_fingerprints/lm/swahili.lm b/libtextcat/data/new_fingerprints/lm/swahili.lm
new file mode 100644
index 000000000000..56090b40153e
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/swahili.lm
@@ -0,0 +1,400 @@
+_ 16483
+a 9342
+i 5293
+a_ 4071
+u 2730
+k 2609
+n 2351
+w 2076
+m 1928
+e 1866
+h 1800
+o 1775
+wa 1743
+l 1486
+s 1419
+i_ 1401
+t 1399
+_k 1306
+y 1100
+_w 969
+li 945
+wa_ 911
+z 891
+_wa 890
+ka 834
+ku 799
+r 770
+b 733
+an 727
+ma 723
+o_ 711
+_m 707
+na 698
+ya 675
+ha 672
+g 602
+al 580
+d 570
+at 560
+am 554
+_n 549
+_ku 549
+ik 543
+_h 515
+ya_ 514
+A 494
+is 487
+_y 485
+hi 474
+na_ 471
+_ya 471
+ta 468
+sh 456
+ali 449
+j 426
+u_ 423
+ki 418
+e_ 402
+p 401
+ti 401
+_wa_ 399
+f 392
+_ya_ 390
+ba 390
+ri 385
+ng 385
+il 380
+c 358
+hu 356
+_na 356
+ni 355
+za 354
+zi 351
+ia 344
+_na_ 343
+_a 336
+in 327
+_ma 326
+ch 322
+mb 317
+ika 314
+. 311
+._ 309
+_ka 307
+as 306
+ak 306
+ati 301
+, 300
+ka_ 296
+,_ 294
+_u 292
+kw 286
+ili 278
+K 274
+en 271
+si 266
+_kw 262
+la 261
+ni_ 261
+ma_ 261
+_s 258
+kwa 258
+ar 256
+ut 245
+za_ 245
+nd 242
+mba 241
+_kwa 239
+_z 234
+li_ 233
+un 233
+ny 230
+it 229
+se 229
+yo 227
+ia_ 222
+M 221
+sa 221
+kat 217
+_K 214
+_i 213
+ika_ 213
+ana 212
+ish 212
+kati 206
+_ha 204
+on 201
+ai 200
+I 198
+aa 196
+um 195
+im 190
+v 188
+mu 187
+amb 187
+sha 185
+em 183
+fa 181
+zi_ 180
+di 179
+mi 178
+_M 178
+us 176
+_ki 176
+ha_ 175
+iw 172
+ama 172
+_kat 168
+_kati 168
+_hi 166
+_l 166
+ra 166
+kwa_ 165
+la_ 164
+W 164
+ja 163
+U 163
+N 163
+amba 161
+ao 161
+_za 160
+ji 160
+B 157
+iwa 155
+tik 155
+wal 155
+le 155
+tika 154
+ge 153
+lis 153
+tu 152
+atika 152
+to 152
+atik 152
+uw 152
+_kwa_ 151
+A_ 151
+ke 150
+S 147
+tika_ 145
+aj 145
+we 144
+cha 144
+bi 141
+az 140
+er 139
+ek 138
+katik 138
+ez 138
+uwa 137
+kut 135
+_al 134
+_B 134
+ad 134
+mu_ 133
+_ali 133
+rik 132
+_W 131
+ba_ 131
+kuw 131
+me 130
+ali_ 128
+kuwa 128
+ema 127
+wan 127
+bu 126
+sem 126
+_A 125
+ir 125
+ata 125
+iz 124
+_hu 124
+ay 124
+ul 124
+af 123
+iki 122
+ema_ 121
+da 120
+ti_ 120
+sema 119
+aka 118
+sema_ 118
+te 118
+uz 117
+yo_ 117
+_v 117
+io 116
+iy 115
+uta 115
+ani 115
+_wal 115
+he 115
+if 114
+_la 114
+ab 114
+go 112
+_za_ 111
+ama_ 111
+sa_ 111
+pa 110
+_t 110
+zo 110
+nge 110
+wam 109
+wali 108
+ua 107
+ur 106
+_c 106
+ise 105
+_ch 105
+isem 105
+ho 105
+ye 104
+iyo 104
+E 104
+el 104
+mo 103
+ung 103
+eri 103
+_wali 103
+_b 102
+mba_ 102
+ari 101
+ita 101
+isema 100
+ot 99
+_la_ 99
+uk 99
+ao_ 99
+di_ 99
+sha_ 99
+ini 99
+kuwa_ 98
+uwa_ 98
+ana_ 98
+lise 98
+lisem 98
+uli 97
+shi 97
+ga 96
+iwa_ 96
+fu 96
+T 96
+R 95
+_il 95
+wak 94
+aw 94
+isha 94
+ri_ 93
+_am 93
+ara 92
+_cha 92
+aji 92
+_ili 91
+ifa 91
+O 90
+_p 90
+uh 90
+iri 90
+chi 90
+asi 89
+po 89
+a. 89
+ong 89
+azi 88
+_j 88
+_kut 88
+eny 88
+nc 88
+a._ 88
+ko 87
+uu 87
+id 87
+w_ 87
+no 87
+P 86
+ah 86
+ina 86
+rika 86
+_Bw 85
+H 85
+gu 85
+uo 85
+Bw_ 85
+_Bw_ 85
+_se 85
+Bw 85
+ib 84
+_S 84
+kam 84
+hi_ 84
+nya 84
+si_ 83
+a, 82
+no_ 81
+pi 81
+ok 81
+i. 81
+ip 81
+kwam 81
+i._ 81
+amba_ 80
+dh 80
+end 80
+ani_ 80
+a,_ 79
+wamb 79
+kwamb 79
+_sh 79
+eza 79
+nz 79
+wi 79
+_kwam 79
+wamba 79
+alis 78
+_kuw 78
+ngo 78
+ap 77
+_N 77
+any 77
+ili_ 77
+C 77
+WA 76
+vy 76
+wana 76
+_hiy 75
+Wa 75
+hiyo 75
+nch 75
+_hiyo 75
+de 75
+_kuwa 75
+ing 75
+hiy 75
+vi 75
+isha_ 74
+es 74
+atu 74
+_Wa 74
+nchi 74
+aki 74
+lim 73
+da_ 73
+ini_ 73
+ash 73
+ala 73
+i, 73
+ano 73
+i,_ 72
+_kam 71
+_wan 71
+ano_ 71
+mw 71
+nde 71
+ji_ 71
+ion 70
+_amb 70
+ndi 70
+_Ka 70
+eza_ 70
diff --git a/libtextcat/data/new_fingerprints/lm/swedish.lm b/libtextcat/data/new_fingerprints/lm/swedish.lm
new file mode 100644
index 000000000000..1c021242b9fe
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/swedish.lm
@@ -0,0 +1,400 @@
+_ 33494
+e 8992
+n 7900
+t 7859
+a 7781
+r 7251
+s 6435
+i 5649
+l 4541
+d 4079
+o 3724
+m 3203
+k 3058
+g 2478
+en 2403
+n_ 2389
+t_ 2073
+de 1939
+r_ 1910
+v 1890
+h 1789
+u 1782
+_s 1768
+ä 1724
+er 1709
+f 1597
+en_ 1537
+a_ 1526
+an 1357
+p 1320
+et 1317
+ö 1278
+Ã¥ 1261
+st 1236
+ar 1226
+c 1191
+_d 1158
+e_ 1116
+in 1045
+_f 1027
+te 1000
+b 997
+_a 978
+s_ 974
+ra 958
+. 956
+tt 935
+_i 898
+_m 890
+._ 886
+ll 870
+ta 844
+_o 842
+_e 839
+nd 820
+ti 804
+sk 798
+re 779
+at 769
+_de 754
+om 743
+m_ 739
+ör 720
+, 697
+,_ 695
+ng 686
+li 673
+ka 666
+oc 662
+_h 654
+on 652
+et_ 647
+ch 645
+ns 643
+is 642
+er_ 630
+är 625
+_v 614
+_t 614
+ni 611
+i_ 609
+_oc 592
+tt_ 587
+na 586
+y 586
+la 579
+_b 579
+h_ 577
+kt 575
+ch_ 568
+ig 564
+fö 563
+och 555
+or 555
+_och 554
+och_ 554
+_och_ 553
+me 548
+den 548
+om_ 535
+_i_ 531
+d_ 530
+j 529
+ik 520
+de_ 520
+för 518
+ge 498
+ad 497
+_k 491
+_fö 487
+ri 484
+el 482
+il 481
+so 480
+al 474
+g_ 469
+le 464
+an_ 461
+_för 447
+si 437
+ar_ 437
+att 435
+_p 434
+es 420
+ing 413
+se 407
+to 404
+_u 403
+_en 403
+and 398
+den_ 395
+nde 393
+nn 393
+_l 391
+Ã¥_ 391
+D 385
+än 383
+nt 382
+l_ 381
+tr 378
+_D 372
+va 370
+am 369
+sa 367
+_so 365
+ga 364
+_en_ 361
+är_ 358
+ck 357
+av 354
+v_ 351
+ed 347
+ma 346
+da 346
+som 346
+rs 344
+som_ 344
+ve 342
+ter 341
+att_ 341
+ha 338
+ne 337
+ut 335
+as 332
+ska 329
+_at 327
+_att 326
+_som 324
+_att_ 324
+_som_ 323
+vi 322
+ikt 317
+_av 316
+det 316
+_den 315
+he 315
+ss 314
+un 307
+ke 304
+_g 303
+us 302
+di 302
+_st 300
+rn 297
+_me 296
+_ä 295
+ade 294
+" 290
+_ha 290
+av_ 289
+ill 288
+_n 286
+_in 279
+io 275
+_r 275
+der 275
+it 274
+_av_ 274
+sta 274
+gen 272
+isk 270
+_ti 269
+id 265
+na_ 265
+ns_ 264
+ko 262
+_den_ 261
+ag 258
+det_ 257
+lig 257
+era 256
+ll_ 255
+_det 252
+_är 251
+be 249
+_är_ 248
+ra_ 247
+ion 244
+- 241
+pr 240
+oni 233
+til 231
+ten 228
+_si 225
+k_ 222
+på 222
+fr 221
+ro 219
+till 219
+iv 216
+ls 216
+ande 215
+ör_ 214
+_det_ 213
+äl 212
+_på 211
+ts 210
+ens 209
+med 209
+mm 208
+rt 208
+_till 208
+_til 208
+_va 207
+_fr 205
+_sk 205
+var 205
+nin 204
+ning 203
+ol 201
+ka_ 200
+lle 198
+ett 198
+rd 197
+em 196
+på_ 195
+x 195
+rk 194
+_ut 194
+ste 194
+ds 193
+_vi 192
+Ã¥r 192
+S 192
+nde_ 191
+are 191
+ver 190
+_på_ 190
+nis 189
+kr 189
+_med 188
+all 188
+Ã¥n 187
+nge 185
+mo 184
+os 183
+ld 182
+ade_ 181
+_S 181
+ed_ 180
+rä 176
+De 175
+_- 175
+kan 174
+ta_ 173
+ng_ 172
+vä 171
+för_ 170
+ill_ 170
+han 170
+_De 170
+pp 169
+lt 169
+sam 168
+nte 167
+ans 167
+ton 166
+ur 165
+mi 165
+ess 165
+kl 164
+ig_ 164
+ks 164
+as_ 163
+und 163
+men 162
+med_ 161
+_med_ 161
+ak 161
+Di 160
+ot 159
+rna 159
+ul 159
+_var 159
+te_ 158
+gen_ 158
+het 157
+kto 157
+str 156
+_Di 155
+tad 155
+lan 154
+ga_ 154
+iska 154
+fa 154
+fi 154
+så 154
+Dikt 153
+Dik 153
+pe 153
+ska_ 152
+ja 152
+H 151
+res 151
+ku 151
+iu 150
+ande_ 150
+till_ 150
+t. 150
+ern 150
+rm 149
+_Dikt 149
+_Dik 149
+ie 149
+bl 148
+-_ 147
+od 147
+_H 147
+n. 147
+ist 147
+_di 146
+ius 146
+_" 145
+la_ 145
+sl 145
+man 145
+ren 145
+_för_ 145
+toni 144
+kton 144
+n._ 144
+ktoni 144
+ikton 144
+I 144
+ikto 144
+nius 143
+ten_ 143
+onius 143
+oniu 143
+toniu 143
+ing_ 143
+Dikto 143
+niu 143
+_ko 143
+ic 142
+_sa 142
+_han 142
+ett_ 142
+sm 141
+ba 141
+M 141
+gr 140
+lä 140
+ex 138
+t._ 138
+sp 137
+lla 137
+_et 137
+_M 137
+dr 137
+rö 136
+rad 136
+ek 136
+_be 135
+tar 135
+_-_ 135
+_om 134
+rl 134
+E 134
+mä 133
diff --git a/libtextcat/data/new_fingerprints/lm/tagalog.lm b/libtextcat/data/new_fingerprints/lm/tagalog.lm
new file mode 100644
index 000000000000..bc87d38d0c92
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/tagalog.lm
@@ -0,0 +1,400 @@
+_ 10664
+a 6092
+n 3421
+i 2332
+g 2149
+ng 1488
+an 1332
+g_ 1308
+ng_ 1243
+t 1155
+a_ 1138
+o 1137
+l 1105
+s 1067
+k 1035
+_n 957
+m 909
+y 801
+ang 787
+u 767
+ang_ 683
+p 682
+na 670
+_a 634
+la 596
+. 572
+in 561
+r 559
+sa 545
+_s 502
+._ 487
+ma 476
+ka 473
+_na 464
+b 462
+_m 458
+al 448
+d 445
+pa 414
+n_ 412
+at 394
+ak 393
+h 378
+ya 358
+_k 356
+ag 350
+ni 344
+_p 339
+_sa 335
+o_ 328
+ta 308
+_an 307
+iy 300
+sa_ 296
+_ang 294
+_ang_ 293
+t_ 290
+ay 286
+ga 283
+on 280
+it 275
+_sa_ 265
+ala 258
+_ng 256
+am 252
+_i 248
+_pa 248
+i_ 243
+na_ 243
+an_ 240
+e 239
+_ma 237
+_ka 235
+iya 231
+y_ 229
+il 228
+li 228
+w 226
+_ni 225
+_na_ 224
+_ng_ 220
+as 214
+ba 207
+si 206
+" 195
+ti 195
+ha 186
+, 179
+ar 178
+ing 173
+ra 173
+A 172
+ki 168
+ong 167
+_b 167
+ap 166
+,_ 162
+ong_ 161
+ko 159
+ay_ 159
+un 157
+ul 155
+yo 154
+to 152
+_l 150
+ah 148
+is 147
+hi 147
+_t 146
+lan 145
+ama 142
+niy 139
+at_ 138
+_niy 138
+aka 136
+wa 136
+niya 135
+_niya 135
+ab 134
+- 134
+di 133
+_si 132
+"_ 131
+aw 129
+_d 126
+_A 126
+yan 123
+ya_ 122
+ata 120
+a. 120
+gi 120
+P 120
+ing_ 118
+um 115
+o. 113
+aki 113
+ri 113
+ik 112
+nd 112
+ila 111
+mo 110
+da 110
+a._ 110
+in_ 109
+la_ 107
+ali 106
+S 106
+man 105
+ig 105
+iya_ 105
+s_ 104
+_ak 104
+_at 103
+_h 102
+yon 102
+asa 101
+ina 101
+_P 101
+n. 99
+N 98
+aa 98
+ga_ 97
+_mo 97
+_ba 97
+_" 95
+ito 94
+bi 94
+yang 94
+n._ 93
+pag 92
+lang 92
+yang_ 92
+_la 92
+o._ 90
+K 90
+_at_ 90
+tu 88
+_S 88
+ara 87
+nga 87
+ro 85
+apa 83
+rr 82
+lam 82
+lo 81
+nan 81
+_N 80
+aman 79
+aha 78
+mg 78
+mga 78
+mga_ 78
+_mga_ 78
+_mg 78
+_mga 78
+_K 78
+siy 77
+kan 76
+it_ 76
+san 76
+d_ 75
+ad 75
+di_ 74
+tin 74
+' 74
+ati 73
+siya 73
+kin 72
+M 72
+lang_ 71
+mo_ 70
+_mo_ 70
+ako 70
+uma 70
+_pag 69
+pi 69
+l_ 68
+_siy 68
+rrr 68
+_siya 67
+ula 67
+_M 66
+Pa 66
+iyo 66
+mi 66
+bu 66
+mu 65
+no 65
+pu 65
+nag 65
+ung 65
+Na 65
+ot 64
+_Na 64
+niya_ 64
+iyan 64
+ku 64
+k_ 63
+go 62
+awa 62
+ip 61
+_Pa 61
+lu 61
+_di 60
+pan 60
+_ta 60
+ini 60
+isa 60
+nt 60
+iyang 60
+_iyo 59
+_iy 59
+on_ 59
+tan 59
+mang 59
+aba 59
+gan 59
+ut 58
+I 58
+hin 58
+nak 58
+an. 57
+akin 57
+_r 57
+han 57
+Ka 57
+_ay 57
+_ako 56
+may 56
+iyon 56
+rrrr 55
+Sa 55
+aga 55
+to_ 55
+nit 55
+_ko 54
+er 54
+ib 54
+ari 54
+ana 54
+ili 54
+an._ 54
+ahi 54
+au 54
+ala_ 54
+gk 53
+pa_ 53
+_is 53
+rin 53
+ilan 52
+_kan 52
+_Ka 52
+_it 52
+_Sa 51
+king 51
+ko_ 51
+_nak 51
+gin 51
+_ay_ 50
+bo 50
+_iyon 50
+amang 50
+'y 49
+os 49
+mang_ 49
+_pa_ 49
+kat 49
+a, 49
+An 48
+Ma 48
+ny 48
+mag 48
+_ku 48
+_ito 48
+_ha 47
+yong 47
+? 47
+aking 47
+T 47
+ni_ 47
+yong_ 47
+_An 47
+king_ 47
+_akin 46
+sang 46
+_nag 46
+kas 46
+_aki 46
+_ni_ 46
+ayo 45
+kit 45
+'y_ 45
+mat 45
+_Ma 45
+lal 45
+ot_ 45
+nya 44
+ban 44
+ndi 44
+oo 44
+_u 44
+ngi 44
+_hi 44
+sang_ 44
+B 43
+su 43
+may_ 43
+rrrrr 43
+p_ 43
+ita 43
+wal 43
+ika 43
+abi 43
+aan 43
+_may 43
+lama 42
+naka 42
+mal 42
+_I 42
+_ri 42
+alan 42
+any 42
+im 42
+_pu 42
+ai 42
+wala 41
+anya 41
+a,_ 41
+_B 41
+ndi_ 41
+as_ 41
+pat 41
+po 41
+nang 41
+_mag 41
+laman 40
+lala 40
+kal 40
+g- 40
+.. 40
+ir 40
+! 40
+uk 40
+gu 39
+ito_ 39
+ro_ 39
+_g 39
+_da 39
+_isa 39
+_lam 39
+ilang 39
+kanya 39
+w_ 39
+kany 39
+agk 38
+pal 38
+ka_ 38
+_naka 38
+siya_ 38
+isan 38
diff --git a/libtextcat/data/new_fingerprints/lm/tamil.lm b/libtextcat/data/new_fingerprints/lm/tamil.lm
new file mode 100644
index 000000000000..8563707d5e9d
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/tamil.lm
@@ -0,0 +1,400 @@
+_ 11468
+Õ 2697
+Ô 2533
+´ 1960
+» 1786
+½ 1249
+Â 1103
+Ù 990
+£ 976
+Ø 849
+¡ 847
+¨ 831
+¿ 821
+À 794
+¾ 758
+§ 750
+Ã 721
+¹ 715
+¶ 713
+. 702
+£_ 671
+× 655
+â 648
+Æ 625
+._ 611
+ç 601
+Ã… 571
+¯ 557
+»Õ 556
+É 524
+_É 520
+¢ 518
+_½ 492
+Ç 489
+_× 488
+Þ 484
+_Ç 482
+¤ 461
+Ä 442
+¼ 418
+ÿ 416
+Â¥ 403
+§_ 398
+½Ô 373
+_Ø 368
+ª 360
+Ã 359
+_Ã 349
+»Ô 348
+´Õ 342
+à 337
+_Â 331
+õ 331
+¨_ 323
+_¼ 321
+¡´ 320
+_´ 310
+Õ¿ 308
+Ü 299
+¢» 294
+Ó 292
+´Ô 292
+ÿ» 290
+Õ§ 288
+¡Þ 284
+â_ 279
+ý 271
+õ´ 266
+ÂÕ 265
+_» 265
+¾Ô 241
+_¾ 234
+´ª 232
+= 231
+± 229
+¤½ 227
+== 221
+¶Õ 218
+»_ 218
+Õ§_ 213
+=== 211
+Õ_ 210
+==== 201
+ª_ 199
+´_ 198
+ÀÕ 197
+===== 191
+, 188
+ؽ 188
+¿Õ 187
+º 186
+_¶ 184
+,_ 184
+Ã 183
+Ô¯ 181
+¿_ 179
+Ô¨ 178
+Ã¥ 172
+´ª_ 171
+ì 169
+Ùà 168
+¢»Õ 165
+¿Ô 165
+Ô_ 165
+ç¡ 164
+È 161
+Þ_ 160
+è 160
+_è 157
+« 156
+_È 156
+§à 152
+Ô´ 152
+Ù» 151
+Ã 150
+_Ã 149
+ä 149
+ö 146
+ÕÅ 146
+Ø½Ô 145
+¯´ 143
+Ö 142
+׶ 142
+ÃÔ 142
+½Õ 140
+Ã_ 139
+Ô§ 138
+_Éç 138
+Éç 138
+¶Ô 137
+Õ¨ 137
+Æ_ 136
+¢â 133
+_׶ 132
+ÔÆ 128
+¡´Õ 128
+ÀÔ 127
+¥¹ 127
+_Ë 127
+Ë 127
+_ؽ 127
+ÕÀ 125
+öÓ 125
+ÄÕ 124
+ÆÔ 122
+ÅÕ 121
+Þ£ 120
+Õç 119
+¼Ô 119
+×½ 119
+´Ù 119
+_Ã¥ 118
+¯_ 118
+£. 117
+´ÕÅ 116
+»Õ¿ 115
+ÂÔ 115
+_ÂÕ 113
+â. 113
+×» 113
+£._ 112
+¡Þ_ 112
+ÕÙ 111
+Õ¡ 111
+ÙÄ 109
+×´ 109
+é 109
+â._ 108
+_½Ô 107
+Õ´ 106
+¤_ 105
+ÆÕ 104
+' 104
+½¹ 103
+ç_ 102
+¹Ô 102
+Ø´ 101
+¼Õ 100
+Éÿ 100
+_Éÿ 99
+_×» 99
+ÔÀ 98
+Éÿ» 98
+æ 98
+_×½ 97
+Ã…_ 97
+Ô» 97
+_Éÿ» 97
+Ô¨_ 97
+¹_ 97
+ß 96
+±Å 96
+×´Ô 96
+»Ô¨ 95
+¡Þ£ 94
+Õ¿_ 94
+Ø» 94
+ø 94
+ØÂ 93
+í 93
+_Ø½Ô 93
+¾Õ 93
+Õ¨_ 92
+ÿ»_ 91
+ÃÕ 91
+_Éç¡ 90
+Éç¡ 90
+Ãç 90
+ÕÆ 90
+_Ãç 90
+ÿ»Õ 90
+Ù¹ 89
+_´Ô 89
+_Ãç_ 88
+_¼Ô 88
+Ãç_ 88
+ì_ 86
+ç¡´ 86
+ÅÔ 86
+ÿâ 85
+¥à 85
+¯´ª 84
+¨Æ 84
+¨ì 83
+Ô¥ 83
+÷ 83
+_Þ 83
+´Ä 82
+à_ 81
+Ô´_ 81
+¨. 80
+_¾Ô 80
+¿Õ§ 80
+² 80
+Õà 80
+Ä_ 79
+´ÙÄ 79
+_½¹ 79
+¨Å 79
+Ô£ 79
+Ǧ 78
+¡_ 78
+¨._ 77
+_¶Õ 77
+§ÃÔ 77
+_¼Õ 77
+ë 77
+Åâ 76
+Þ£_ 75
+ÔÆ_ 75
+Ù¾ 75
+¯ 75
+ÙÂ 74
+çÿ 73
+ýà 72
+è 72
+¨ì_ 71
+Ô§_ 71
+´ë 71
+¥Ü 71
+§Ù 70
+»Õ§ 70
+§Ùà 70
+£½ 69
+Ù»_ 69
+ªÄ 69
+ç¡Þ 69
+Ó_ 68
+Ôõ 68
+ؾ 67
+_Ø´ 67
+Õ¢ 66
+ÄÔ 66
+»Ô¨_ 66
+࣠66
+_Ç» 66
+Ç» 66
+Ô¹ 66
+ÃŽ 66
+¿Õ§_ 65
+Ôâ 65
+_»Õ 64
+¯. 64
+¹¢ 63
+Õ¥ 63
+Ô¡ 63
+_×´ 63
+_ÃŽ 63
+Ù´ 62
+´ÄÕ 62
+Õ¿Ô 62
+¯._ 61
+Éÿ»Õ 61
+_½Õ 61
+_×»Ô 61
+×»Ô 61
+Ôõ´ 61
+½¥ 60
+ÿ»Õ¿ 60
+_ä 60
+_Éÿ»Õ 60
+Ô¾ 60
+Ôç 59
+×¶Ô 59
+¡Þ£_ 59
+¤ä 59
+_ÇÙ 58
+ÿâ_ 58
+ÙÅ 58
+ÇÂ 58
+ÇÙ 58
+Éÿ»Õ¿ 58
+ºÕ 58
+»À 57
+½Ô¯ 57
+¹£ 57
+ýº 57
+_ÇÂ 57
+æ£ 56
+Ôà 56
+_×¶Ô 55
+? 55
+ý¹ 54
+ÃÕ 54
+ÙÆ 54
+»Õç 54
+_×´Ô 54
+´ÕÅâ 53
+»Õ¿_ 53
+_À 53
+ÕÅâ 53
+¡´ÕÅ 53
+ç¡´Õ 52
+ÇÀ 52
+ÙÄ_ 52
+× 51
+Ü_ 51
+'_ 51
+_ÇÀ 51
+ÔÂ 51
+èì 51
+Âç 51
+î 51
+â¡ 51
+_èì 51
+À_ 51
+Õ¿Õ 51
+ÙÀ 51
+_ɧ 50
+ɧ 50
+½ý 50
+»Õ§_ 50
+×½Ô 50
+±ÅÕ 50
+»¾ 49
+à£_ 49
+Ôà 49
+õ´ª 49
+´¥ 49
+»Õ_ 49
+»ÕÀ 48
+¶Ô§ 48
+ç£ 48
+Õ¡´ 48
+Õ¤ 48
+ÕØ 48
+À£ 48
+ÀÕ¿ 48
+_Éç¡´ 47
+æ£_ 47
+ê 47
+èì_ 47
+׶ԧ 47
+Éç¡´ 47
+Â_ 47
+_èì_ 47
+?_ 46
+Ø¿ 46
+Ô¿ 46
+_Ø» 46
+¹õ 46
+_Ù 46
+õ´ª_ 46
+è£ 45
+´ÙÄ_ 45
+¢Ù» 45
+¢Ù 45
+»Ù 45
+_×½Ô 44
+½Ü 44
+Ô£_ 44
+ÕÆÕ 44
+´Ø 44
+á 44
+´£ 44
+½Ôâ 44
+ÃÔ£ 44
+Ã. 44
+_׶ԧ 43
+´ë¡ 43
diff --git a/libtextcat/data/new_fingerprints/lm/thai.lm b/libtextcat/data/new_fingerprints/lm/thai.lm
new file mode 100644
index 000000000000..e4b65ecdad56
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/thai.lm
@@ -0,0 +1,400 @@
+_ 6290
+Ã’ 5252
+Ã 4377
+¹ 3920
+¡ 3050
+è 2984
+Ã 2657
+§ 2522
+Ñ 2454
+Ã 2369
+é 2304
+à 2158
+Â 1981
+Ç 1908
+Õ 1693
+Ã… 1543
+Ô 1443
+´ 1422
+· 1398
+µ 1398
+» 1301
+Ë 1245
+Ã 1239
+¤ 1210
+º 1181
+Ê 1081
+¨ 1045
+ä 978
+ÒÃ 951
+¾ 907
+ª 892
+èÒ 829
+á 795
+¡Ò 735
+ã 722
+¡ÒÃ 710
+¢ 691
+ÒÂ 688
+× 668
+ç 595
+. 588
+ç 553
+Ø 543
+ÃÑ 532
+Õè 528
+Ãà 522
+»Ã 522
+·Õ 477
+Ù 452
+·Õè 451
+èà 443
+¹Ò 441
+Ó 422
+Ò§ 419
+éÒ 414
+ì 388
+¹_ 378
+ÃÃ 367
+Ã’Ã 360
+° 354
+Ѻ 349
+Ã’_ 347
+éà 345
+»Ãà 340
+Çè 337
+ÃÒ 337
+Ãè 336
+ÇèÒ 336
+à» 334
+Ò¹ 333
+Ñé 332
+¡Ã 328
+´é 325
+Ö 322
+_à 320
+ç¹ 314
+À 313
+ÃÃ’ 299
+ѧ 297
+Ò¡ 297
+×à 296
+äà 295
+Ñ° 293
+ÃÑ° 293
+×è 290
+â 289
+äÃè 288
+¸ 285
+ã¹ 280
+¼ 273
+è§ 268
+¹ÒÂ 268
+é¹ 266
+¾Ã 263
+ѹ 263
+³ 263
+áÅ 256
+¹¡ 254
+ËÒ 253
+§¡ 252
+×èà 251
+ÃÕ 251
+ä´ 248
+­ 248
+ä´é 248
+¨à 248
+»ç¹ 247
+à»ç¹ 247
+ȍ 247
+à»ç 247
+á 245
+ÇÒ 245
+ãË 244
+¶ 244
+ªÒ 243
+µÔ 241
+_á 240
+¡Ñ 238
+È 237
+_¡ 236
+µè 234
+« 227
+éÇ 225
+é§ 221
+É 216
+¡ 214
+ä 213
+ÕÂ 211
+Ñé§ 211
+Ãà 211
+¢à 210
+Ô¹ 209
+èÇ 206
+Õé 202
+Ã…Ã 202
+˹ 201
+¹Ñ 201
+_¹ 200
+µÃ 197
+Ëé 196
+§_ 193
+ù 191
+Ãà 191
+ÀÒ 188
+ÃÂ 188
+ÒÇ 186
+¢Ã§ 184
+¹Õ 184
+ãËé 183
+¤Ç 183
+ÃÕ 181
+§à 180
+¡Å 179
+áÅà 178
+èÒ§ 177
+èÒ_ 176
+Ãä 176
+_¾ 176
+¤ÇÒà 174
+¹µ 174
+ÇÒà 174
+¤ÇÒ 174
+Ñ´ 172
+Ô´ 172
+ÇèÒ_ 171
+¾Ãà 167
+¨Ò 167
+ù 167
+Òµ 167
+¹Õé 167
+¾Ãä 166
+¡ç 166
+¤Ã 165
+¹à 165
+è¹ 163
+ºÒ 162
+¢é 161
+§ã 161
+Õ_ 161
+ì_ 154
+ËÅ 154
+Ã× 154
+éç 151
+¹¡Òà 151
+¹¡Ò 151
+ä» 150
+Ñ¡ 150
+é_ 148
+ÃÒÂ 147
+µÑ 146
+¹· 146
+ÅÑ 146
+Â_ 146
+ྠ145
+Åé 140
+à¡ 139
+¨Ò¡ 139
+àà 138
+ÃÔ 138
+¾Å 137
+Ã× 136
+·Ñ 135
+¡Ñº 134
+Ò¡ 133
+_Ã 132
+ºÃ 132
+§ä 132
+Ãà 131
+à· 130
+Åè 129
+ÒµÔ 129
+_· 128
+¡Ô 128
+µÃÕ 128
+ÃѺ 128
+Õ¡ 128
+àË 127
+¹à 127
+µé 126
+_¹Ò 126
+ªÒµÔ 126
+Ã…Ã’ 126
+ªÒµ 126
+¹Ç 126
+Òà 126
+_áÅ 125
+§¹ 124
+§¤ 124
+¡ÒÃà 124
+ÒÃà 124
+ÃÃ 123
+Ò¤ 122
+  122
+áµ 122
+àÊ 121
+ÇÑ 121
+Ñé¹ 120
+ÃÃà 120
+Ò· 119
+¹µÃ 119
+èÒÇ 118
+áµè 118
+§· 117
+ǹ 117
+ÂÑ 117
+ùµÃ 117
+¹µÃÕ 117
+ùµ 117
+ùµÃÕ 117
+Ãà 116
+Ùé 116
+_¹ÒÂ 116
+àÃ× 116
+°Ã¹ 115
+àà 115
+ÃѰù 115
+Ëà 115
+ࢠ115
+Ñ°à 115
+Ã. 115
+Ѱùµ 115
+ÃÑ°à 115
+°Ã¹µ 115
+Ѱù 115
+°Ã¹µÃ 115
+°à 115
+_áÅà 113
+éà 113
+Ãè 113
+èà 112
+㨠112
+_Ê 112
+¹Ñé 111
+Ô» 111
+¹Ñé¹ 111
+èà 111
+èä 110
+_Ã 110
+Âà 110
+_¨ 110
+Ò¨ 109
+»ÃÒ 108
+¹Ò¡ 108
+_Ë 107
+Ñ­ 107
+éÒ¹ 107
+¨Ñ 106
+§¡Ò 106
+_¤ 106
+§¡Òà 105
+ÇÂ 105
+Ôµ 105
+¹é 105
+Ã’Ã… 103
+´Â 102
+è_ 102
+ÃÃà 102
+àà 102
+¹¹ 102
+ÃÃ’ 101
+â´ 100
+悅 100
+ªè 100
+_ä 99
+ÇÅ 99
+悇 99
+â´Â 99
+Ã_ 98
+ê 98
+¾Ñ 98
+»ÃÃà 98
+¡à 97
+Öè§ 97
+¡_ 97
+Öè 97
+Òª 97
+é¹_ 96
+ÅÔ 96
+Ñ°º 94
+Ò¾ 94
+ÃÑ°º 94
+¼Ù 94
+°º 94
+Âè 93
+¹ä 93
+·Ò 93
+°ºÒ 92
+ÅèÒ 92
+Ã…. 92
+Ñ°ºÒ 92
+¡Ñ¹ 92
+Ñ°ºÒÅ 92
+§Ã 92
+ÃÑ°ºÒ 92
+ºÒÅ 92
+°ºÒÅ 92
+Ö§ 92
+.Ã 91
+¢éÒ 91
+á 91
+_¾Å 90
+ÃÂè 90
+¾Å. 90
+Ãà 90
+ÃÂèÒ 90
+ÂèÒ 90
+¡Ãà 89
+.Ã. 89
+ÂèÒ§ 89
+Õé_ 89
+ÃÂèÒ§ 89
+儤 89
+ÊØ 89
+抅 88
+ú 88
+_â 88
+Ã….Ã 87
+èç 87
+Ãê 87
+Ãà 87
+ÂÇ 87
+Åѧ 87
+¾Å.Ã. 87
+Ã….Ã. 87
+¹Õé_ 87
+à´ 87
+á 87
+¼Ùé 87
+ÇÔ 87
+¾Å.à 87
+»Ãê 87
+Ȅ 87
+' 86
+ÊÔ 86
+Êè 86
+Ãèä 86
+_¾Å. 86
+·Ò§ 86
+Çà 85
+¡à 85
+§ 85
+._ 85
+¡ÃÑ 85
+ÃÀ 85
+ªÇ 84
+Ã’Ã 84
+¹éÒ 84
+¡ÃÑ° 84
+ÀÔ 84
+·Ó 84
+ËÒÃ 84
+_¾Å.à 84
+ÃÃ 83
+§Ê 83
+ÃÀÔ 83
+äÃèä 83
+ѵ 82
+»ÃÒ 82
+¤¹ 82
+Ã_ 82
diff --git a/libtextcat/data/new_fingerprints/lm/turkish.lm b/libtextcat/data/new_fingerprints/lm/turkish.lm
new file mode 100644
index 000000000000..553be45fd735
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/turkish.lm
@@ -0,0 +1,400 @@
+_ 23226
+i 8957
+a 7675
+e 6219
+n 5169
+& 4950
+; 4950
+l 4674
+r 4464
+&i 3206
+&i; 3206
+i; 3206
+s 3206
+k 3009
+d 2977
+t 2442
+m 2010
+y 2001
+u 1885
+n_ 1725
+g 1584
+o 1567
+b 1470
+e_ 1426
+ü 1353
+ar 1273
+la 1236
+a_ 1209
+i_ 1169
+in 1124
+_b 1101
+an 1097
+er 1073
+le 1058
+s; 1036
+&s; 1036
+&s 1036
+;_ 1018
+de 936
+;n 918
+&i;n 917
+i;n 917
+,_ 872
+_, 872
+_,_ 872
+, 872
+i;_ 863
+&i;_ 863
+_k 816
+en 769
+r_ 762
+_y 759
+da 759
+il 759
+k_ 750
+z 739
+nd 725
+&g; 708
+g; 708
+&g 708
+ra 697
+_a 692
+_d 692
+_s 676
+' 673
+_i 644
+._ 641
+. 641
+_. 637
+_._ 637
+c 637
+ka 635
+v 632
+;& 624
+h 615
+_g 597
+ri 596
+ç 596
+lar 583
+li 580
+ma 559
+ya 555
+ler 553
+p 547
+re 529
+al 529
+ö 527
+_t 520
+ir 508
+ak 502
+bi 500
+;l 480
+in_ 478
+di 477
+r& 468
+el 453
+et 449
+ek 445
+n& 439
+_o 439
+ol 437
+da_ 434
+n&i 433
+n&i; 433
+ni 429
+ti 428
+de_ 425
+an_ 422
+eri 421
+r&i 417
+r&i; 417
+s& 413
+s&i 413
+s&i; 413
+ar& 412
+me 407
+te 405
+a& 404
+i& 390
+ay 387
+ne 380
+_bi 373
+_ka 368
+ar&i; 367
+ar&i 367
+u_ 363
+as 363
+_e 362
+ta 359
+&i;l 352
+i;l 352
+nda 350
+ki 347
+na 346
+si 343
+_v 337
+;&i; 334
+;&i 334
+ve 334
+ara 333
+en_ 332
+;i 331
+on 328
+un 326
+l&i; 322
+l& 322
+l&i 322
+leri 322
+ba 318
+_m 318
+ik 315
+mi 315
+f 306
+lar& 302
+lar&i 302
+sa 298
+_h 297
+ld 296
+&i;& 290
+i;& 290
+_ve 288
+l_ 287
+ge 286
+is 285
+ed 285
+i&s; 284
+i&s 284
+;r 282
+_ya 279
+_ol 279
+d&i; 278
+d& 278
+d&i 278
+nl 277
+kl 275
+;k 274
+&i;n_ 271
+;n_ 271
+i;n_ 271
+ile 270
+or 269
+iy 267
+a&s 264
+a&s; 264
+y&i; 262
+ad 262
+y& 262
+y&i 262
+ye 259
+ha 258
+es 258
+t& 257
+t&i 257
+t&i; 257
+ini 253
+;nd 253
+i;nd 253
+ür 253
+&i;nd 253
+se 248
+_ge 248
+i;nda 248
+;nda 248
+;n&i; 247
+i;n& 247
+;n& 247
+&i;n& 247
+i;n&i 247
+;n&i 247
+bu 245
+_' 245
+_ba 244
+as&i 242
+as&i; 242
+_de 242
+as& 242
+at 240
+am 240
+nda_ 239
+ar_ 231
+ve_ 230
+rin 230
+_ve_ 228
+_bu 227
+im 227
+&i;r 226
+i;r 226
+ur 221
+g;&i 220
+g;& 220
+&g;& 220
+yo 220
+&g;&i 220
+g;&i; 220
+ul 215
+ak_ 215
+ke 213
+nu 213
+erin 211
+g;i 208
+&g;i 208
+lan 207
+bir 205
+r&i;n 205
+nde 202
+rl 202
+n&i;_ 201
+ko 201
+ca 200
+m_ 197
+rd 196
+t_ 194
+er_ 194
+st 193
+em 193
+_sa 190
+lm 189
+rt 188
+_ü 187
+i;k 187
+ün 187
+ola 187
+&i;k 187
+lerin 185
+ce 185
+'_ 185
+;m 183
+az 183
+rk 182
+yü 182
+;la 181
+_bir 181
+ir_ 180
+n&i;n 180
+ru 180
+lu 180
+;nda_ 178
+e& 177
+_ç 176
+_ha 175
+_ko 173
+esi 171
+_ö 170
+ap 170
+ni_ 168
+tü 167
+den 164
+ind 161
+di_ 161
+be 161
+s&i;n 160
+nin 159
+üz 158
+ri_ 155
+y&i;l 155
+_p 154
+nin_ 153
+&s;_ 152
+_y& 152
+edi 152
+s;_ 152
+_y&i; 152
+_y&i 152
+yl 151
+le_ 151
+inde 150
+eti 150
+ala 150
+&i;&s 149
+ele 149
+i;&s; 149
+;&s; 149
+;&s 149
+i;&s 149
+ek_ 148
+ere 148
+çi 147
+du 145
+ön 145
+z_ 144
+na_ 144
+eri_ 143
+ec 142
+gö 142
+i;&g; 141
+s&i;_ 141
+bir_ 141
+&i;&g 141
+i;&g 141
+ah 141
+;&g; 141
+;&g 141
+_gö 140
+lar_ 140
+eli 140
+a&g; 140
+a&g 140
+dan 140
+ac 140
+iç 140
+an& 140
+u& 139
+;&g;& 138
+_yü 138
+an&i 138
+an&i; 138
+pa 138
+it 137
+_ola 137
+_bir_ 136
+;t 135
+ör 135
+ne_ 135
+ini_ 134
+lma 134
+kan 133
+ab 132
+to 131
+ba& 131
+kar 130
+r&i;_ 130
+_ar 129
+ili 129
+li_ 129
+ki_ 128
+bu_ 127
+anl 127
+dü 127
+ler_ 126
+_ba& 126
+kon 126
+ll 125
+tl 125
+ine 125
+e&g; 124
+e&g 124
+_il 124
+_bu_ 124
+re_ 124
+bil 123
+&s;i 123
+;&i;n 123
+s;i 123
+ede 123
+zd 123
+'' 122
+_da 122
+_'' 122
+_tü 122
+ret 122
+_-_ 121
+_''_ 121
+mas 121
+''_ 121
+- 121
+dan_ 121
+leri_ 121
+;u 121
+_- 121
+ev 121
diff --git a/libtextcat/data/new_fingerprints/lm/ukrainian.lm b/libtextcat/data/new_fingerprints/lm/ukrainian.lm
new file mode 100644
index 000000000000..438bbdabae46
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/ukrainian.lm
@@ -0,0 +1,400 @@
+_
+о
+и
+а
+â••
+в
+н
+Ñ€
+Ñ‚
+е
+д
+л
+к
+у
+п
+_п
+м
+и_
+_в
+з
+â••_
+а_
+б
+о_
+е_
+ь
+г
+ч
+_Ñ‚
+ов
+_з
+_д
+у_
+в╕
+.
+_Ñ
+ли
+ро
+в_
+на
+по
+ш
+ÑŽ
+ти
+й
+ж
+ор
+Ñ…
+_к
+ит
+ин
+щ
+╕д
+"
+_б
+Ñ_
+до
+ви
+ц
+О
+ом
+ко
+_н
+пр
+â•“
+ра
+ни
+С
+._
+то
+од
+ка
+ло
+_по
+ри
+м_
+ÑŽ_
+ки
+_пр
+ал
+н╕
+ть
+_м
+ен
+ол
+ер
+,
+го
+ь_
+ою
+"_
+_до
+ар
+за
+й_
+на_
+_в_
+во
+ил
+бу
+та
+â•–
+_щ
+ав
+ки_
+_â••
+що
+И
+об
+да
+уд
+д╕
+мо
+_бу
+_â••_
+ть_
+Т
+ог
+Р
+_за
+,_
+╕в
+_ц
+╕н
+ою_
+╕л
+ÑÑ‚
+п╕
+де
+ат
+оÑ
+те
+ну
+не
+_що
+_в╕
+в╕д
+_о
+дн
+ти_
+ла
+а╓
+ли_
+ого
+Ñ‚â••
+он
+П
+о╖
+хо
+ик
+_ч
+ле
+_Ñ€
+â•–_
+л╕
+ц╕
+_П
+ом_
+що_
+но
+р╕
+ду
+ить
+_п╕
+ьк
+н╕_
+_"
+же
+з_
+_не
+ÑÑ
+аж
+Я
+З
+Ð’
+_г
+â•“_
+Ñ…_
+_ви
+иÑ
+_то
+оро
+ва
+нн
+_л
+ов╕
+_що_
+ди
+про
+_мо
+ль
+му
+ем
+н_
+ий_
+_Ñ‚â••
+ати
+Я_
+ще
+_про
+К
+оди
+оло
+рт
+ак
+ить_
+ад
+о╖_
+ив
+лÑ
+ий
+_Ñ
+_Ð
+го_
+до_
+_З
+_ка
+п╕д
+нÑ
+_ко
+_на
+че
+чи
+_Ñо
+_ÑÑ‚
+а╓_
+_з_
+же_
+при
+ÑÑ_
+ови
+б╕
+ка_
+╕й
+ого_
+пе
+би
+╕ль
+â••Ñ‚
+к_
+_буд
+ма
+Ñо
+Ñи
+буд
+Ñв
+пов
+оз
+ок
+Л
+_при
+Г
+Д
+оч
+тор
+ур
+га
+уде
+аз
+ел
+ан
+их_
+╕льк
+ити
+ен╕
+к╕
+ому
+их
+ен╕_
+льк
+_до_
+_Ñ…
+ве
+ОС
+му_
+_п╕д
+не_
+ннÑ
+зн
+буде
+ча
+ому_
+али
+â••Ñ
+ц╕_
+ин_
+_буде
+â••Ñ€
+â••Ñ
+ннÑ_
+д_
+!
+чен
+Ñ„
+Й
+ов╕д
+_пра
+дов
+льки
+ув
+ру
+ре
+гр
+_пер
+_не_
+╕да
+тер
+рон
+Й_
+енн
+рн
+пер
+им
+ши
+╕льки
+ла_
+льки_
+шо
+ба
+_й
+."
+в╕р
+_т╕ль
+ход
+ьки
+_Ñоб
+иш
+дем
+Ñто
+_С
+_К
+ви_
+арти
+_Ñто
+_Ð’
+СТ
+вÑ
+нк
+вч
+вÑ
+дно
+_л╕
+т╕л
+!_
+_ки
+_у
+╕й_
+ван
+ьки_
+нÑ_
+т╕ль
+чо
+рти
+бит
+еннÑ
+ину
+_Ñв
+вин
+д╕_
+пра
+ну_
+каж
+_па
+_пе
+_за_
+удем
+будем
+т╕льк
+арт
+кою
+ьо
+па
+I
+зна
+але
+щен
+."_
+аже
+пов╕
+за_
+_ро
+_гр
+ми
+_т╕л
+Ñоб
+РО
+_в╕д
+карт
+каже
+*
+ЗÐ
+Ч
+_Ñтор
+╕нк
+ож
+Б
diff --git a/libtextcat/data/new_fingerprints/lm/vietnamese.lm b/libtextcat/data/new_fingerprints/lm/vietnamese.lm
new file mode 100644
index 000000000000..14221268dc1e
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/vietnamese.lm
@@ -0,0 +1,400 @@
+_ 88044
+n 17000
+h 12823
+t 9071
+i 8490
+c 8394
+g 8035
+ng 6718
+_t 6352
+_c 5234
+a 5083
+g_ 4883
+ng_ 4882
+_n 4379
+n_ 4365
+i_ 4365
+u 4149
+m 3648
+à 3635
+nh 3480
+o 3451
+Ç 3193
+_Ç 3168
+r 3011
+l 2692
+Ü 2659
+, 2328
+,_ 2295
+c_ 2279
+_l 2263
+ch 2226
+v 2161
+th 2158
+a_ 2132
+_th 2113
+y 2111
+. 2107
+t_ 2106
+h_ 2074
+_v 2057
+_h 1888
+_m 1834
+_ch 1813
+s 1749
+nh_ 1715
+u_ 1679
+á 1668
+à_ 1649
+tr 1611
+b 1589
+_tr 1581
+_nh 1541
+_b 1530
+m_ 1505
+p 1483
+._ 1455
+k 1429
+_s 1415
+o_ 1380
+y_ 1371
+_k 1367
+_ng 1348
+© 1343
+e 1296
+ó 1208
+‰ 1199
+ô 1181
+¶ 1139
+Ã¥ 1132
+T 1126
+hi 1121
+â 1081
+ì 1058
+ê 1055
+d 1040
+_g 1039
+kh 1034
+_kh 1027
+_T 994
+©i 967
+là 943
+_là 941
+" 926
+ä 916
+Ü© 901
+an 893
+_d 888
+Ó 877
+©i_ 859
+on 853
+Ã 848
+N 846
+Š 820
+ó_ 813
+Π801
+ôn 795
+ph 776
+_p 772
+_ph 752
+û 752
+§ 734
+hÜ 728
+ho 715
+và 709
+_và 703
+gÜ 702
+® 700
+_r 696
+H 693
+_là_ 691
+là_ 691
+Ü©i 684
+àn 679
+ên 677
+ông 676
+ha 672
+gi 663
+C 656
+_gi 655
+Ö 654
+gܩi 646
+gܩ 646
+Ü©i_ 624
+ông_ 610
+ngÜ 610
+_ngÜ 608
+ác 606
+ú 601
+ngܩ 600
+ngܩi 600
+_ngܩ 598
+gܩi_ 594
+ûa 587
+cû 586
+_cû 585
+¶t 585
+cûa 585
+ên_ 585
+_cûa 584
+có 583
+ûa_ 582
+_có 581
+cûa_ 581
+ã 581
+_cûa_ 580
+¶t_ 578
+_N 574
+có_ 574
+» 573
+_có_ 572
+iΠ568
+À 562
+‹ 562
+m¶ 557
+_C 553
+_m¶ 551
+p_ 540
+Æ 540
+m¶t 538
+m¶t_ 534
+_m¶t 532
+ìn 528
+_m¶t_ 528
+ti 526
+i‰ 525
+Ù 517
+ÃŽ 512
+ình 500
+.. 500
+† 497
+và_ 497
+_và_ 496
+æ 491
+q 490
+qu 490
+_H 487
+_q 484
+_qu 484
+ong 481
+ong_ 471
+há 471
+x 470
+hô 468
+¿ 466
+_" 460
+Ü® 456
+ro 453
+ình_ 445
+ì_ 434
+_x 434
+ã_ 427
+í 423
+_ti 423
+in 422
+ân 421
+"_ 418
+iŠ 415
+Ön 413
+ron 412
+V 411
+rong 410
+§i 410
+rong_ 409
+ác_ 405
+ª 400
+‰n 399
+hôn 398
+Ã¥i 395
+ay 390
+_V 387
+h» 382
+hà 380
+ày 377
+Ãt 376
+uy 374
+ÇÜ 374
+_ÇÜ 373
+§i_ 372
+cá 371
+_cá 367
+nà 366
+‹_ 366
+hú 366
+_nà 365
+ra 363
+hông 362
+ho_ 359
+ân_ 356
+án 356
+° 355
+Ã¥i_ 354
+ai 352
+hu 352
+cho 352
+ܮc 351
+®c 351
+Ón 351
+_cho 349
+tro 347
+‰t 347
+ào 347
+_tro 346
+_tron 346
+ñ 346
+tron 346
+trong 346
+M 345
+khô 340
+âu 338
+_khô 338
+cho_ 337
+_cho_ 336
+hông_ 336
+ay_ 333
+ch_ 332
+Çã 331
+Ãt_ 331
+( 331
+_( 330
+Ã’ 329
+_Çã 329
+ÇÜ® 328
+_ÇÜ®c 328
+) 328
+ÇÜ®c 328
+_ÇÜ® 328
+khôn 324
+_ñ 324
+_khôn 322
+Çã_ 322
+_Çã_ 320
+ò 318
+Ã¥n 318
+không 316
+ào_ 316
+ܮc_ 316
+®c_ 316
+nhÜ 315
+Ü_ 315
+»ng 313
+»ng_ 313
+»n 313
+_nhÜ 313
+Th 312
+hì 311
+Û 310
+h»n 310
+h»ng_ 310
+h»ng 310
+iê 309
+gh 309
+Šu 307
+ta 307
+anh 307
+¡ 307
+ai_ 306
+àng 306
+ày_ 304
+ÇÜ®c_ 298
+ÜÖ 295
+S 295
+: 294
+‰t_ 294
+e_ 294
+:_ 290
+ÃŽ_ 289
+ua 288
+æn 288
+ài 286
+Šu_ 285
+_nh» 285
+nh» 285
+nh»ng 285
+nh»n 285
+_nh»n 285
+_Th 285
+‰n_ 284
+ÂŒn 283
+Ø 281
+_M 281
+A 281
+úc 278
+L 277
+ø 277
+ÜÖn 276
+_ha 276
+n, 275
+Öng 275
+ÜÖng 275
+an_ 272
+ài_ 271
+iŠu 269
+sÓ 269
+n,_ 268
+¿_ 268
+on_ 267
+_sÓ 267
+các 265
+àng_ 265
+_các 264
+anh_ 264
+ngh 264
+_ta 263
+hi_ 262
+hàn 261
+Š_ 261
+âu_ 261
+Àn 260
+ù 260
+_ngh 257
+ia 255
+¢ 252
+... 252
+êu 251
+Ùc 251
+i, 249
+iŠu_ 248
+nhi 247
+B 246
+i,_ 245
+Óng 244
+ª_ 244
+co 244
+_nhi 244
+Â¥ 244
+ܧ 244
+iên 243
+D 243
+Tr 241
+_S 240
+hÜ_ 239
+òn 237
+hà 236
+hÆ 233
+K 233
+Öng_ 232
+ôi 232
+ÜÖng_ 232
+Àn_ 231
+_co 231
+ÂŒn_ 229
+hå 229
+äi 229
+yê 229
+Ûn 229
+¢n 228
+Ûng 228
+_L 227
+Ûng_ 226
+inh 226
+Çi 225
+mà 225
+_Çi 225
+ng, 224
+ang 224
+P 224
+Šn 224
+g, 224
+g,_ 223
+_v§ 223
diff --git a/libtextcat/data/new_fingerprints/lm/welsh.lm b/libtextcat/data/new_fingerprints/lm/welsh.lm
new file mode 100644
index 000000000000..c25d4a410bef
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/welsh.lm
@@ -0,0 +1,400 @@
+_ 78614
+a 18113
+d 17890
+y 16837
+e 14108
+n 13902
+r 13869
+i 12928
+o 9972
+l 9188
+h 7454
+g 7165
+w 7010
+t 6296
+f 6080
+u 5400
+n_ 5159
+_y 5051
+dd 4946
+s 4884
+r_ 4547
+m 4364
+d_ 4223
+yn 4082
+_a 4020
+c 3707
+th 3301
+u_ 2900
+yd 2900
+ae 2649
+l_ 2588
+_g 2547
+. 2460
+' 2458
+._ 2408
+wy 2334
+yn_ 2331
+_d 2307
+b 2267
+ym 2262
+ra 2261
+an 2235
+ar 2228
+ol 2218
+ia 2212
+i_ 2199
+ed 2191
+_yn 2038
+_i 2020
+y_ 1983
+_c 1934
+_yn_ 1861
+ei 1815
+it 1796
+g_ 1787
+ith 1779
+dd_ 1745
+h_ 1741
+ydd 1731
+ai 1709
+au 1708
+'r_ 1655
+'r 1655
+_y_ 1621
+di 1615
+, 1613
+,_ 1608
+da 1585
+od 1546
+_o 1535
+ad 1532
+th_ 1510
+ll 1494
+au_ 1492
+_s 1415
+er 1359
+o_ 1350
+io 1311
+a_ 1299
+ol_ 1294
+_e 1273
+_h 1249
+cy 1246
+yr 1243
+en 1228
+p 1217
+_f 1212
+_cy 1203
+mr 1200
+gy 1193
+ymr 1185
+ha 1172
+hy 1164
+eg 1159
+do 1151
+edd 1147
+G 1134
+we 1107
+C 1089
+_ar 1078
+_m 1075
+yf 1057
+de 1048
+fe 1047
+_G 1045
+ch 1043
+rae 1023
+el 1016
+no 1010
+ni 1010
+_b 1009
+ne 1001
+ef 992
+ri 983
+et 978
+_C 957
+_gy 952
+_a_ 943
+eth 943
+_. 940
+_._ 939
+eit 930
+eith 927
+ys 921
+wyd 912
+ga 901
+_i_ 893
+s_ 887
+mrae 878
+mraeg 878
+raeg 878
+ymrae 878
+aeg 878
+ymra 878
+mra 878
+_p 856
+aet 850
+aeth 850
+_n 844
+un 838
+on 836
+ait 833
+aith 833
+nn 830
+at 827
+oe 823
+li 805
+_r 801
+ddi 795
+gw 789
+ma 783
+le 777
+nt 772
+ho 769
+ff 766
+yr_ 764
+na 753
+la 748
+rh 747
+eg_ 734
+si 733
+ng 732
+dol 732
+ro 718
+al 712
+_dd 706
+wn 705
+oed 705
+Gy 704
+dy 701
+Cy 696
+o' 692
+ar_ 691
+ny 690
+Gym 678
+wr 677
+id 676
+_Gy 672
+_o_ 668
+Gymr 667
+if 662
+ith_ 662
+_ar_ 660
+iad 657
+_w 657
+fo 656
+eu 655
+aeg_ 650
+raeg_ 650
+aith_ 648
+or 648
+_Gym 646
+fa 642
+re 638
+_Cy 638
+_Gymr 635
+_gw 633
+fy 633
+oedd 633
+edd_ 629
+rd 627
+od_ 622
+ac 619
+ddo 612
+an_ 607
+Gymra 606
+er_ 605
+A 604
+eth_ 601
+hw 596
+ydd_ 591
+o'r 586
+o'r_ 586
+es 583
+ir 579
+dw 573
+go 559
+yl 548
+rw 545
+aeth_ 545
+wydd 543
+aw 539
+_rh 539
+dr 537
+ly 537
+fn 534
+dau 533
+_hy 531
+t_ 531
+sg 529
+'n 529
+* 528
+'n_ 523
+_* 522
+nyd 521
+nydd 521
+M 519
+st 518
+Y 516
+sy 515
+yd_ 513
+lw 512
+_ga 508
+iai 503
+il 502
+_l 499
+rt 494
+ad_ 493
+_yr 493
+_yr_ 492
+as 492
+dol_ 492
+f_ 491
+dda 491
+ig 490
+og 484
+wa 479
+he 478
+iaith 477
+iait 477
+oedd_ 475
+_ma 473
+c_ 472
+Cym 470
+te 469
+_ym 467
+am 467
+_M 465
+_ia 462
+efn 462
+i' 460
+ie 458
+_Cym 458
+_ac 457
+dau_ 456
+yw 455
+ew 453
+fr 441
+fod 441
+_A 441
+du 437
+_sy 434
+e_ 432
+wi 426
+Cymr 426
+se 425
+B 424
+D 424
+_Cymr 423
+bl 423
+lu 420
+in 417
+_t 417
+tr 414
+ac_ 413
+wed 410
+os 410
+_iai 407
+_iait 407
+el_ 405
+_ac_ 405
+rha 404
+m_ 404
+is 403
+on_ 401
+eu_ 393
+hi 393
+rdd 393
+id_ 389
+_Y 388
+ry 387
+odd 387
+rwy 387
+rf 386
+io_ 380
+ynn 380
+cyf 380
+hr 380
+_cyf 379
+yddi 379
+cyn 372
+_de 372
+rth 371
+ru 368
+S 363
+wei 363
+ysg 362
+_B 362
+u' 361
+yddo 360
+wn_ 360
+so 359
+dio 359
+_ei 358
+N 356
+dwy 355
+_da 353
+me 353
+gan 353
+gyf 353
+w_ 352
+_o' 351
+fer 349
+nol 347
+hyn 346
+ddy 346
+af 346
+ta 343
+ddol 343
+_fe 340
+nd 340
+mae 338
+_cyn 338
+efnyd 337
+fnydd 337
+fny 337
+efny 337
+fnyd 337
+iad_ 335
+_mae 333
+ion 333
+_ll 330
+def 330
+_gyf 327
+nt_ 326
+i'r_ 326
+- 326
+i'r 326
+weith 325
+weit 325
+defn 325
+defny 325
+bo 324
+hyd 323
+by 322
+_si 321
+ir_ 321
+hau 318
+nod 318
+edi 315
+I 314
+fyd 313
+wyr 313
+ada 311
+ddio 310
+rif 309
+sia 307
+sa 306
+fel 305
+tha 305
+_S 302
+_ne 302
+_u 301
+fod_ 300
+_o'r 300
+yg 300
+_o'r_ 300
+_i' 299
+ge 299
+dia 299
diff --git a/libtextcat/data/new_fingerprints/lm/yiddish_utf.lm b/libtextcat/data/new_fingerprints/lm/yiddish_utf.lm
new file mode 100644
index 000000000000..e3386a3c1d86
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/yiddish_utf.lm
@@ -0,0 +1,400 @@
+× 29767
+_ 13552
+_× 6516
+Ö 4273
+ 3670
+× 3670
+×¢ 3261
+¢ 3261
+¢× 2826
+×¢× 2826
+×Ö 2785
+Ö 2785
+×™ 2565
+™ 2565
+¨ 2082
+ר 2082
+™× 2062
+×™× 2062
+˜ 1857
+ט 1857
+· 1828
+Ö· 1828
+Ÿ 1793
+ן 1793
+_× 1537
+Ÿ_ 1532
+ן_ 1532
+·× 1527
+Ö·× 1527
+×Ö· 1517
+Ö· 1517
+×¨× 1355
+¨× 1355
+œ 1270
+ל 1270
+¸ 1268
+×Ö¸ 1268
+Ö¸ 1268
+Ö¸ 1268
+Ö·× 1240
+×Ö·× 1240
+×  1232
+  1232
+×Ö¸× 1229
+Ö¸× 1229
+¸× 1229
+Ö¸× 1229
+× × 1181
+ × 1181
+ו 1116
+• 1116
+“ 1031
+ד 1031
+×˜× 1012
+˜× 1012
+×œ× 1007
+œ× 1007
+×•× 975
+•× 975
+×“× 965
+“× 965
+×¤Ö 929
+¤ 929
+פ 929
+¤Ö 929
+¢×¨ 873
+ער 873
+×× 870
+× 870
+×– 868
+– 868
+¡ 832
+ס 832
+§ 797
+ק 797
+_×× 784
+מ 766
+ž 766
+_×Ö 752
+ž× 747
+×ž× 747
+’ 742
+×’ 742
+© 725
+ש 725
+ט_ 699
+˜_ 699
+×° 691
+° 691
+×°× 674
+°× 674
+² 667
+ײ 667
+’× 664
+×’× 664
+¿ 661
+Ö¿ 661
+×§× 654
+§× 654
+¿× 652
+Ö¿× 652
+×©× 651
+©× 651
+–× 643
+×–× 643
+פֿ 641
+¤Ö¿ 641
+×¤Ö¿× 638
+¤Ö¿× 638
+_×Ö· 638
+ר_ 592
+¨_ 592
+_ד 560
+_×“× 554
+, 551
+,_ 534
+ב 517
+‘ 517
+_×¤Ö 502
+_פ 502
+×‘× 472
+‘× 472
+×¡× 436
+¡× 436
+צ 436
+¦ 436
+×¦× 433
+¦× 433
+_×° 430
+_×°× 429
+” 410
+×” 410
+¢×¨_ 404
+_פֿ 404
+ער_ 404
+_מ 400
+_×ž× 396
+×’×¢ 390
+’×¢ 390
+_×– 390
+_×–× 386
+’×¢× 384
+ען 384
+×’×¢× 384
+¢×Ÿ 384
+×¢×¨× 382
+¢×¨× 382
+. 374
+××™ 372
+×™ 372
+Ö¼ 366
+¼ 366
+”× 363
+×”× 363
+×™× 362
+××™× 362
+_×’ 359
+×± 357
+± 357
+_×’× 356
+ון 349
+•×Ÿ 349
+×™_ 346
+™_ 346
+×¢_ 343
+¢_ 343
+•×Ÿ_ 337
+ון_ 337
+“×¢ 334
+דע 334
+_××™ 331
+ך 330
+š 330
+“×¢× 323
+¼× 323
+×“×¢× 323
+Ö¼× 323
+²× 322
+×²× 322
+ען_ 321
+¢×Ÿ_ 321
+±× 320
+×±× 320
+“×™ 317
+די 317
+²Ö· 311
+ַר 311
+×²Ö 311
+·×¨ 311
+ַר 311
+ײַ 311
+²Ö 311
+לע 310
+œ×¢ 310
+._ 307
+- 305
+·_ 297
+Ö·_ 297
+› 294
+×› 294
+ ×¢ 290
+× ×¢ 290
+ס_ 289
+¡_ 289
+פּ 288
+¤Ö¼ 288
+×²Ö·× 287
+²Ö·× 287
+ך_ 283
+š_ 283
+Ö·_ 274
+×Ö·_ 274
+Ö·×¨× 272
+·×¨× 272
+×œ×¢× 261
+œ×¢× 261
+_×”× 260
+_×” 260
+_צ 259
+™×© 257
+יש 257
+_×¦× 256
+×–×™ 254
+–×™ 254
+×¤Ö¼× 252
+¤Ö¼× 252
+מע 248
+ž×¢ 248
+_ק 247
+× ×¢× 247
+ ×¢× 247
+טע 245
+˜×¢ 245
+_×§× 245
+×ו 244
+×•× 244
+ו 244
+××•× 244
+¢×œ 238
+על 238
+”× 235
+×”× 235
+×”×Ö 235
+שט 235
+©×˜ 235
+”×Ö 235
+×›× 234
+›× 234
+_×‘× 231
+“ער 231
+_ב 231
+Ö·×  229
+·×  229
+·× × 228
+Ö·× × 228
+_×ו 227
+_×’×¢ 226
+ 219
+× 219
+" 218
+ž×¢× 217
+×ž×¢× 217
+°×¢ 217
+×°×¢ 217
+·×œ 216
+ַל 216
+_×–×™ 215
+_×  215
+×¢×œ× 212
+¢×œ× 212
+¨× 211
+×¨× 211
+_× × 211
+×˜×¢× 211
+˜×¢× 211
+×°× 210
+°× 210
+_דע 210
+°×Ö 209
+×°×¢× 209
+×°×Ö 209
+°×¢× 209
+™×©× 208
+×™×©× 208
+™×§ 207
+יק 207
+ר×Ö 206
+¨×Ö 206
+–_ 205
+×–_ 205
+ž×™ 196
+מי 196
+_ש 195
+×ž×™× 195
+ž×™× 195
+ַל 193
+_×©× 191
+Ö¿× 189
+¿× 189
+¤Ö¿× 189
+Ö¿×Ö 188
+¿×• 188
+Ö¿×•× 188
+ֿו 188
+¿×•× 188
+¿×Ö 188
+ון 187
+”×Ö¸ 186
+_×”× 185
+¤Ö¿×• 184
+_×¢ 179
+_די 178
+˜× 176
+×˜× 176
+ט×Ö 175
+˜×Ö 175
+ָס 174
+ָס 174
+יט 174
+™×˜ 174
+¸×¡ 174
+ל_ 173
+œ_ 173
+“×™_ 173
+די_ 173
+×_ 171
+·×œ× 171
+_ 171
+Ö·×œ× 171
+_×¢× 171
+ָט 169
+¸×˜ 169
+ָט 169
+יך 168
+™×š 168
+ָר 166
+–×™× 166
+ָר 166
+¸×¨ 166
+×–×™× 166
+× ×™ 164
+×¢×  164
+ ×™ 164
+¢×  164
+¨×™ 163
+רי 163
+יך_ 163
+™×š_ 163
+°×Ö¸ 162
+×¢× × 162
+¢× × 162
+¿×Ö· 160
+¢×˜ 160
+עט 160
+_×™ 158
+¨×™× 157
+™×¨ 157
+×¨×™× 157
+יר 157
+-× 156
+ָס_ 155
+¸×¡_ 155
+œ×™ 154
+_מי 154
+לי 154
+קע 153
+ונ 153
+§×¢ 153
+•×  153
+_×°× 152
+ ×™× 152
+× ×™× 152
+™×Ÿ 151
+ין 151
+××± 151
+×± 151
+×™Ö 150
+·×˜ 150
+´ 150
+™Ö 150
+×™Ö´ 150
+™Ö´ 150
+Ö´ 150
+ַט 150
+Ö´× 149
+™Ö´× 149
+´× 149
+×™Ö´× 149
+œ×™× 148
+_×™× 148
+×œ×™× 148
+×±× 146
+××±× 146
+Ö·×  146
+_××± 145
+¿×•×Ÿ 145
+×™×– 143
+™×– 143
+“×™× 142
+×“×™× 142
diff --git a/libtextcat/data/new_fingerprints/lm/zulu.lm b/libtextcat/data/new_fingerprints/lm/zulu.lm
new file mode 100644
index 000000000000..f30c09ced93f
--- /dev/null
+++ b/libtextcat/data/new_fingerprints/lm/zulu.lm
@@ -0,0 +1,400 @@
+_
+a
+e
+i
+n
+u
+o
+l
+k
+h
+s
+a_
+b
+t
+m
+g
+w
+z
+e_
+i_
+ng
+ku
+d
+y
+la
+an
+_n
+th
+le
+_u
+o_
+el
+ba
+_k
+en
+in
+wa
+p
+_e
+zi
+.
+hi
+si
+al
+ha
+uk
+ab
+_i
+r
+is
+ka
+_a
+kh
+we
+li
+ni
+ma
+_ng
+he
+ul
+._
+ga
+thi
+la_
+be
+ak
+c
+on
+nd
+na
+ok
+am
+lo
+ho
+,
+se
+ph
+hi_
+ut
+es
+nga
+_ku
+,_
+ez
+thi_
+un
+uth
+le_
+uku
+hu
+f
+u_
+um
+ek
+ne
+go
+q
+_uk
+at
+aba
+_l
+sh
+lu
+M
+_uku
+ol
+_b
+hl
+ni_
+ngo
+kw
+-
+N
+ik
+oku
+em
+nt
+as
+ge
+az
+ya
+iz
+sa
+_o
+S
+uthi
+A
+za
+_w
+wa_
+_s
+mb
+kut
+kuth
+ela
+ye
+_y
+uthi_
+il
+ay
+ele
+ba_
+I
+dl
+nge
+ath
+ub
+ke
+U
+zo
+na_
+yi
+us
+kuthi
+esi
+ob
+v
+om
+ama
+it
+lo_
+bu
+L
+ezi
+j
+ny
+im
+ing
+li_
+_ab
+eni
+no
+de
+ela_
+ze
+ang
+ko
+ala
+lw
+yo
+zin
+_U
+lel
+eng
+mi
+_ngo
+eb
+uz
+me
+gi
+ti
+ukut
+so
+ukuth
+bo
+da
+_ba
+nz
+_aba
+the
+eli
+akh
+eni_
+E
+ban
+s_
+aka
+_kw
+ma_
+ap
+_ukut
+he_
+ini
+di
+K
+ka_
+ib
+kwa
+ulu
+ele_
+kho
+nj
+bi
+_z
+khu
+we_
+lal
+enz
+ho_
+et
+C
+gu
+zi_
+and
+hla
+ngi
+pha
+_um
+_ka
+isi
+_nge
+isa
+aph
+ung
+izi
+dla
+ala_
+zw
+nde
+to
+n_
+ne_
+nk
+ke_
+_I
+athi
+_no
+lan
+_wa
+kul
+B
+ind
+fu
+wen
+ikh
+azi
+ule
+kub
+e.
+_S
+x
+o.
+ona
+kha
+_iz
+je
+bh
+_M
+er
+kwe
+oba
+ane
+O
+_N
+sa_
+a.
+lwa
+_ez
+kus
+ki
+mu
+od
+"
+ebe
+P
+_nga
+hul
+_m
+ase
+ben
+_be
+T
+ic
+nda
+_si
+_na
+/
+ant
+ngu
+ad
+anga
+nje
+ith
+a._
+ye_
+athi_
+R
+os
+alo
+tha
+za_
+eth
+_es
+uma
+ana
+ile
+te
+ale
+aban
+:
+_A
+oba_
+hat
+kun
+ha_
+phe
+be_
+ali
+_am
+si_
+wo
+uy
+sik
+ise
+kan
+hath
+dlal
+_ne
+zwe
+aw
+han
+tu
+nye
+qe
+_ko
+ah
+hel
+thu
+isa_
+gob
+_K
+_lo
+ta
+_ama
+ot
+ula
+_em
+ze_
+i.
+ngob
+_izi
+hol
+ar
+ani
+ole
+uba
+_in
+up
+eka
+ini_
+goba
+tho
+hon
+_ezi
+ona_
+ezin
+ngoba
+lu_
+goba_
+ip
+a,
+eli_
+t_
+nya
+ndl
+sha
+_is
+the_
+i._
+amb
diff --git a/libtextcat/libtextcat-2.2.patch b/libtextcat/libtextcat-2.2.patch
new file mode 100644
index 000000000000..ca7a26cabfbf
--- /dev/null
+++ b/libtextcat/libtextcat-2.2.patch
@@ -0,0 +1,4078 @@
+--- misc/libtextcat-2.2/configure Thu May 22 13:39:55 2003
++++ misc/build/libtextcat-2.2/configure Mon Mar 31 11:29:14 2008
+@@ -3451,7 +3451,7 @@
+ ;;
+
+ # This must be Linux ELF.
+-linux-gnu*)
++linux-gnu*|k*bsd*-gnu*)
+ case $host_cpu in
+ alpha* | hppa* | i*86 | mips | mipsel | powerpc* | sparc* | ia64*)
+ lt_cv_deplibs_check_method=pass_all ;;
+@@ -5391,7 +5391,8 @@
+ allow_undefined_flag=
+ no_undefined_flag=
+ need_lib_prefix=unknown
+-need_version=unknown
++#need_version=unknown
++need_version=no
+ # when you set need_version to no, make sure it does not cause -set_version
+ # flags to be left without arguments
+ archive_cmds=
+@@ -5785,7 +5786,7 @@
+ # cross-compilation, but unfortunately the echo tests do not
+ # yet detect zsh echo's removal of \ escapes. Also zsh mangles
+ # `"' quotes if we put them in here... so don't!
+- archive_cmds='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs && $CC $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib ${lib}-master.o $deplibs$linker_flags $(test .$module != .yes && echo -install_name $rpath/$soname $verstring)'
++ archive_cmds='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs && $CC $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib ${lib}-master.o $deplibs$compiler_flags $(test .$module != .yes && echo -install_name $rpath/$soname $verstring)'
+ # We need to add '_' to the symbols in $export_symbols first
+ #archive_expsym_cmds="$archive_cmds"' && strip -s $export_symbols'
+ hardcode_direct=yes
+@@ -6280,7 +6281,7 @@
+ ;;
+
+ freebsd*)
+- objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout`
++ objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo elf`
+ version_type=freebsd-$objformat
+ case $version_type in
+ freebsd-elf*)
+@@ -6365,7 +6365,7 @@
+ ;;
+
+ # This must be Linux ELF.
+-linux-gnu*)
++linux-gnu*|k*bsd*-gnu*)
+ version_type=linux
+ need_lib_prefix=no
+ need_version=no
+--- misc/libtextcat-2.2/src/Makefile.in Thu May 22 13:39:52 2003
++++ misc/build/libtextcat-2.2/src/Makefile.in Mon Mar 31 11:29:14 2008
+@@ -124,20 +124,20 @@
+ target_vendor = @target_vendor@
+ AUTOMAKE_OPTIONS = 1.4 foreign
+
+-WARNS = -W -Wall -Wshadow -Wpointer-arith
+-IFLAGS =
+-FLAGS = -g -O3 -funroll-loops -D_THREAD_SAFE -D_GNU_SOURCE
++#WARNS = -W -Wall -Wshadow -Wpointer-arith
++IFLAGS =
++#FLAGS = -g -O3 -funroll-loops -D_THREAD_SAFE -D_GNU_SOURCE
+ VERBOSE = -DVERBOSE
+ AM_CFLAGS = $(IFLAGS) $(VERBOSE) $(WARNS) $(FLAGS)
+ AM_LDFLAGS = -g
+
+ noinst_HEADERS = \
+- common.h constants.h fingerprint.h textcat.h wg_mempool.h
++ common.h constants.h fingerprint.h textcat.h wg_mempool.h utf8misc.h
+
+
+ lib_LTLIBRARIES = libtextcat.la
+ libtextcat_la_SOURCES = \
+- common.c fingerprint.c textcat.c wg_mempool.c
++ common.c fingerprint.c textcat.c wg_mempool.c utf8misc.c
+
+
+ bin_PROGRAMS = createfp
+@@ -156,7 +156,7 @@
+ libtextcat_la_LDFLAGS =
+ libtextcat_la_LIBADD =
+ am_libtextcat_la_OBJECTS = common.lo fingerprint.lo textcat.lo \
+- wg_mempool.lo
++ wg_mempool.lo utf8misc.lo
+ libtextcat_la_OBJECTS = $(am_libtextcat_la_OBJECTS)
+ bin_PROGRAMS = createfp$(EXEEXT)
+ noinst_PROGRAMS = testtextcat$(EXEEXT)
+@@ -177,7 +177,8 @@
+ @AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/common.Plo ./$(DEPDIR)/createfp.Po \
+ @AMDEP_TRUE@ ./$(DEPDIR)/fingerprint.Plo \
+ @AMDEP_TRUE@ ./$(DEPDIR)/testtextcat.Po ./$(DEPDIR)/textcat.Plo \
+-@AMDEP_TRUE@ ./$(DEPDIR)/wg_mempool.Plo
++@AMDEP_TRUE@ ./$(DEPDIR)/wg_mempool.Plo \
++@AMDEP_TRUE@ ./$(DEPDIR)/utf8misc.Plo
+ COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+ LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+@@ -213,7 +214,7 @@
+ @rm -f stamp-h1
+ cd $(top_builddir) && $(SHELL) ./config.status src/config.h
+
+-$(srcdir)/config.h.in: $(top_srcdir)/configure.ac $(ACLOCAL_M4)
++$(srcdir)/config.h.in: $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOHEADER)
+ touch $(srcdir)/config.h.in
+
+@@ -247,8 +248,8 @@
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+-libtextcat.la: $(libtextcat_la_OBJECTS) $(libtextcat_la_DEPENDENCIES)
+- $(LINK) -rpath $(libdir) $(libtextcat_la_LDFLAGS) $(libtextcat_la_OBJECTS) $(libtextcat_la_LIBADD) $(LIBS)
++libtextcat.la: $(libtextcat_la_OBJECTS) $(libtextcat_la_DEPENDENCIES)
++ $(LINK) -avoid-version -rpath $(libdir) $(libtextcat_la_LDFLAGS) $(libtextcat_la_OBJECTS) $(libtextcat_la_LIBADD) $(LIBS)
+ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+ install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+@@ -285,10 +286,10 @@
+ echo " rm -f $$p $$f"; \
+ rm -f $$p $$f ; \
+ done
+-createfp$(EXEEXT): $(createfp_OBJECTS) $(createfp_DEPENDENCIES)
++createfp$(EXEEXT): $(createfp_OBJECTS) $(createfp_DEPENDENCIES)
+ @rm -f createfp$(EXEEXT)
+ $(LINK) $(createfp_LDFLAGS) $(createfp_OBJECTS) $(createfp_LDADD) $(LIBS)
+-testtextcat$(EXEEXT): $(testtextcat_OBJECTS) $(testtextcat_DEPENDENCIES)
++testtextcat$(EXEEXT): $(testtextcat_OBJECTS) $(testtextcat_DEPENDENCIES)
+ @rm -f testtextcat$(EXEEXT)
+ $(LINK) $(testtextcat_LDFLAGS) $(testtextcat_OBJECTS) $(testtextcat_LDADD) $(LIBS)
+
+@@ -304,6 +305,7 @@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testtextcat.Po@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/textcat.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wg_mempool.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf8misc.Plo@am__quote@
+
+ distclean-depend:
+ -rm -rf ./$(DEPDIR)
+--- misc/libtextcat-2.2/src/common.c Thu May 22 13:32:43 2003
++++ misc/build/libtextcat-2.2/src/common.c Mon Mar 31 11:29:14 2008
+@@ -3,23 +3,23 @@
+ *
+ * Copyright (c) 2003, WiseGuys Internet B.V.
+ * All rights reserved.
+- *
++ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+- *
++ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+- *
++ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+- *
++ *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+- *
++ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@@ -114,11 +114,11 @@
+ wgmem_error( "Error while strduping %u bytes.\n", strlen(s) );
+ }
+
+- return( result );
++ return( result );
+ }
+
+-extern void* wg_realloc( void *ptr, size_t size )
+-{
++extern void* wg_realloc( void *ptr, size_t size )
++{
+ void *result;
+
+ if (!size) {
+@@ -131,7 +131,7 @@
+ wgmem_error( "Error while reallocing %u bytes.\n", size );
+ }
+
+- return( result );
++ return( result );
+ }
+
+ extern void wg_free( void *mem )
+@@ -148,12 +148,12 @@
+ if ( fgets(line, size, fp) == NULL ) {
+ return NULL;
+ }
+-
++
+ /** kill term null **/
+ if ( (p = strpbrk( line, "\n\r" )) ) {
+ *p = '\0';
+- }
+-
++ }
++
+ return line;
+ }
+
+@@ -164,39 +164,39 @@
+ *
+ * ARGUMENTS:
+ * - result:
+- *
++ *
+ * After the split, this array contains pointers to the start of each
+ * detected segment. Must be preallocated and at least as large as
+ * maxsegments. The pointers point into the dest buffer.
+- *
+- * - dest:
+- *
++ *
++ * - dest:
++ *
+ * String into which result points as an index. Must be preallocated, and
+ * at least as big as src. You can use src as dest, but in that case src
+ * is overwritten!
+- *
+- * - src:
+- *
++ *
++ * - src:
++ *
+ * The string to split. Sequences of whitespace are treated as separators, unless
+ * escaped. There are two ways to escape: by using single quotes (anything
+ * between single quotes is treated as one segment), or by using a backslash
+ * to escape the next character. The backslash escape works inside quotation
+ * as well.
+- *
++ *
+ * Example:
+- *
++ *
+ * "It\'s very\ easy 'to use WiseGuys\' wg_split()' function" is split into:
+- *
++ *
+ * "It's"
+ * "very easy"
+ * "to use WiseGuys' wg_split()"
+ * "function"
+- *
+- * - maxsegments:
+- *
++ *
++ * - maxsegments:
++ *
+ * The maximum number of segments. If the splitter runs out of segments,
+ * the remainder of the string is stored in the last segment.
+- *
++ *
+ * RETURN VALUE:
+ * The number of segments found.
+ */
+@@ -218,12 +218,12 @@
+ switch (state) {
+ case 0:
+ /*** Skip spaces ***/
+- while ( isspace((int) *p) ) {
++ while ( isspace((unsigned char) *p) ) {
+ p++;
+ }
+ state = 1;
+
+- case 1:
++ case 1:
+ /*** Start segment ***/
+ result[cnt] = w;
+ cnt++;
+@@ -232,12 +232,12 @@
+ case 2:
+ /*** Unquoted segment ***/
+ while (*p) {
+- if ( isspace((int) *p) ) {
++ if ( isspace((unsigned char) *p) ) {
+ *w++ = '\0';
+ p++;
+ state = 0;
+ break;
+- }
++ }
+ else if ( *p == '\'' ) {
+ /*** Start quotation ***/
+ p++;
+@@ -292,17 +292,17 @@
+ }
+
+
++#ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
+ extern void wg_timerstart(wgtimer_t *t)
+ {
+-#ifdef HAVE_GETTIMEOFDAY
+ gettimeofday( &(t->start), NULL );
+-#endif
+ }
++#endif /* TL : no struct timeval under Win32 */
+
+
++#ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
+ extern uint4 wg_timerstop(wgtimer_t *t)
+ {
+-#ifdef HAVE_GETTIMEOFDAY
+ uint4 result;
+ gettimeofday( &(t->stop), NULL );
+ result = (t->stop.tv_sec - t->start.tv_sec) * 1000000 +
+@@ -312,25 +312,23 @@
+ t->start.tv_usec = t->stop.tv_usec;
+
+ return result;
+-#else
+- return 0;
+-#endif
+ }
++#endif /* TL : no struct timeval under Win32 */
+
+
+ /**
+ * wg_strgmov -- a guarded strcpy() variation
+- *
++ *
+ * copies src to dest (including terminating zero), and returns
+ * pointer to position of terminating zero in dest. The function is
+ * guaranteed not to write past destlimit. If the copy couldn't be
+- * finished, the function returns NULL after restoring the first
+- * character in dest for your convenience (since this is usually a zero).
++ * finished, the function returns NULL after restoring the first
++ * character in dest for your convenience (since this is usually a zero).
+ */
+ char *wg_strgmov( char *dest, const char *src, const char *destlimit )
+ {
+ char tmp, *w;
+-
++
+ if ( !dest || dest >= destlimit ) {
+ return NULL;
+ }
+@@ -355,7 +353,7 @@
+ }
+
+ /*
+- * wg_trim() -- remove whitespace surrounding a string.
++ * wg_trim() -- remove whitespace surrounding a string.
+ *
+ * Example: " bla bla bla " becomes "bla bla bla" after trimming.
+ *
+@@ -373,12 +371,12 @@
+ char *lastnonspace = &dest[-1];
+ const char *p = src;
+ char *w = dest;
+-
+- while ( isspace((int)*p) ) {
++
++ while ( isspace((unsigned char)*p) ) {
+ p++;
+ }
+ while (*p) {
+- if ( !isspace((int)*p) ) {
++ if ( !isspace((unsigned char)*p) ) {
+ lastnonspace = w;
+ }
+ *w++ = *p++;
+--- misc/libtextcat-2.2/src/common.h Thu May 22 15:02:29 2003
++++ misc/build/libtextcat-2.2/src/common.h Mon Mar 31 11:29:14 2008
+@@ -1,28 +1,28 @@
+ #ifndef _COMMON_H_
+ #define _COMMON_H_
+ /**
+- * common.h -- a mixed bag of helper functions
++ * common.h -- a mixed bag of helper functions
+ *
+ * Copyright (C) 2003 WiseGuys Internet B.V.
+ *
+ * THE BSD LICENSE
+- *
++ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+- *
++ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+- *
++ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+- *
++ *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+- *
++ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@@ -86,10 +86,12 @@
+ typedef char boole;
+ #endif
+
++#ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
+ typedef struct wgtimer_s {
+ struct timeval start;
+ struct timeval stop;
+ } wgtimer_t;
++#endif /* TL : no struct timeval under Win32 */
+
+
+ extern void *wg_malloc( size_t size );
+@@ -101,13 +103,15 @@
+
+ extern char *wg_getline( char *line, int size, FILE *fp );
+
++#ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
+ extern void wg_timerstart(wgtimer_t *t);
+ extern uint4 wg_timerstop(wgtimer_t *t);
++#endif /* TL : no struct timeval under Win32 */
+
+ extern unsigned int wg_split( char **result, char *dest, char *src, int maxsegments );
+ extern char *wg_strgmov( char *dest, const char *src, const char *destlimit );
+ extern char *wg_trim( char *dest, const char *src );
+
+-
++
+ #endif
+
+--- misc/libtextcat-2.2/src/constants.h Thu May 22 13:32:43 2003
++++ misc/build/libtextcat-2.2/src/constants.h Mon Mar 31 11:29:14 2008
+@@ -39,6 +39,8 @@
+ */
+ #include <limits.h>
+
++#define _UTF8_
++
+ #define DESCRIPTION "out of place"
+
+ /* Reported matches are those fingerprints with a score less than best
+@@ -59,14 +61,21 @@
+ /* Maximum number of n-grams in a fingerprint */
+ #define MAXNGRAMS 400
+
+-/* Maximum size of an n-gram? */
+-#define MAXNGRAMSIZE 5
++/* Maximum number of character of an n-gram? */
++#define MAXNGRAMSYMBOL 5
++
++/* Maximum size of the string representing an n-gram (must be greater than number of symbol) */
++#ifdef _UTF8_
++#define MAXNGRAMSIZE 20
++#else
++#define MAXNGRAMSIZE MAXNGRAMSYMBOL
++#endif
+
+ /* Which characters are not acceptable in n-grams? */
+-#define INVALID(c) (isspace((int)c) || isdigit((int)c))
++#define INVALID(c) (isspace((unsigned char)c) || isdigit((unsigned char)c))
+
+ /* Minimum size (in characters) for accepting a document */
+-#define MINDOCSIZE 25
++#define MINDOCSIZE 6
+
+ /* Maximum penalty for missing an n-gram in fingerprint */
+ #define MAXOUTOFPLACE 400
+@@ -75,5 +84,8 @@
+ #define TABLEPOW 13
+
+ #define MAXSCORE INT_MAX
++
++/* where the fingerprints files are stored */
++#define DEFAULT_FINGERPRINTS_PATH ""
+
+ #endif
+--- misc/libtextcat-2.2/src/fingerprint.c Thu May 22 13:32:43 2003
++++ misc/build/libtextcat-2.2/src/fingerprint.c Mon Mar 31 11:29:14 2008
+@@ -6,23 +6,23 @@
+ * All rights reserved.
+ *
+ * THE BSD LICENSE
+- *
++ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+- *
++ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+- *
++ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+- *
++ *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+- *
++ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@@ -51,7 +51,7 @@
+ * The reason why we go through the trouble of doing a partial
+ * (heap)sort is that a full quicksort behaves horribly on the data:
+ * most n-grams have a very low count, resulting in a data set in
+- * nearly-sorted order. This causes quicksort to behave very badly.
++ * nearly-sorted order. This causes quicksort to behave very badly.
+ * Heapsort, on the other hand, behaves handsomely: worst case is
+ * Mlog(N) for M n-grams filtered through a N-sized heap.
+ *
+@@ -63,6 +63,10 @@
+ * - put table/heap datastructure in a separate file.
+ */
+
++#ifndef _UTF8_
++#define _UTF8_
++#endif
++
+ #include "config.h"
+ #include <stdio.h>
+ #ifdef HAVE_STDLIB_H
+@@ -80,10 +84,12 @@
+ #include "wg_mempool.h"
+ #include "constants.h"
+
++#include "utf8misc.h"
+
+ #define TABLESIZE (1<<TABLEPOW)
+ #define TABLEMASK ((TABLESIZE)-1)
+
++
+ typedef struct {
+
+ sint2 rank;
+@@ -96,7 +102,7 @@
+ const char *name;
+ ngram_t *fprint;
+ uint4 size;
+-
++
+ } fp_t;
+
+ typedef struct entry_s {
+@@ -105,13 +111,13 @@
+ struct entry_s *next;
+ } entry_t;
+
+-typedef struct table_s {
++typedef struct table_s {
+ void *pool;
+ entry_t **table;
+ entry_t *heap;
+
+ struct table_s *next;
+-
++
+ uint4 heapsize;
+ uint4 size;
+ } table_t;
+@@ -122,7 +128,7 @@
+ * fast and furious little hash function
+ *
+ * (Note that we could use some kind of rolling checksum, and update it
+- * during n-gram construction)
++ * during n-gram construction)
+ */
+ static uint4 simplehash( const char *p, int len )
+ {
+@@ -134,29 +140,14 @@
+ }
+
+
+-/* checks if n-gram lex is a prefix of key and of length len */
+-inline int issame( char *lex, char *key, int len )
+-{
+- int i;
+- for (i=0; i<len; i++) {
+- if ( key[i] != lex[i] ) {
+- return 0;
+- }
+- }
+- if ( lex[i] != 0 ) {
+- return 0;
+- }
+- return 1;
+-}
+-
+
+ /* increases frequency of ngram(p,len) */
+-static inline int increasefreq( table_t *t, char *p, int len )
+-{
+- uint4 hash = simplehash( p, len ) & TABLEMASK;
++static int increasefreq( table_t *t, char *p, int len )
++{
++ uint4 hash = simplehash( p, len ) & TABLEMASK;
+ entry_t *entry = t->table[ hash ];
+-
+- while ( entry ) {
++
++ while ( entry ) {
+ if ( issame( entry->str, p, len ) ) {
+ /*** Found it! ***/
+ entry->cnt++;
+@@ -168,7 +159,7 @@
+ }
+
+ /*** Not found, so create ***/
+- entry = wgmempool_alloc( t->pool, sizeof(entry_t) );
++ entry = (entry_t*)(wgmempool_alloc( t->pool, sizeof(entry_t) ));
+ strcpy( entry->str, p );
+ entry->cnt = 1;
+
+@@ -181,12 +172,12 @@
+ #if 0
+
+ /* looks up ngram(p,len) */
+-static entry_t *findfreq( table_t *t, char *p, int len )
+-{
+- uint4 hash = simplehash( p, len ) & TABLEMASK;
++static entry_t *findfreq( table_t *t, char *p, int len )
++{
++ uint4 hash = simplehash( p, len ) & TABLEMASK;
+ entry_t *entry = t->table[ hash ];
+-
+- while ( entry ) {
++
++ while ( entry ) {
+ if ( issame( entry->str, p, len ) ) {
+ return entry;
+ }
+@@ -219,7 +210,7 @@
+ #define GREATER(x,y) ((x).cnt > (y).cnt)
+ #define LESS(x,y) ((x).cnt < (y).cnt)
+
+-inline static void siftup( table_t *t, unsigned int child )
++static void siftup( table_t *t, unsigned int child )
+ {
+ entry_t *heap = t->heap;
+ unsigned int parent = (child-1) >> 1;
+@@ -241,7 +232,7 @@
+ }
+
+
+-inline static void siftdown( table_t *t, unsigned int heapsize, uint4 parent )
++static void siftdown( table_t *t, unsigned int heapsize, uint4 parent )
+ {
+ entry_t *heap = t->heap;
+ unsigned int child = parent*2 + 1;
+@@ -273,7 +264,7 @@
+ if (t->size < t->heapsize) {
+ memcpy( &(heap[t->size]), item, sizeof(entry_t));
+ siftup( t, t->size );
+- t->size++;
++ t->size++;
+ return 0;
+ }
+
+@@ -316,18 +307,18 @@
+
+ /*** Fill result heap ***/
+ for (i=0; i<TABLESIZE; i++) {
+- entry_t *p = t->table[i];
++ entry_t *p = t->table[i];
+ while (p) {
+ heapinsert(t, p);
+ p = p->next;
+ }
+- }
++ }
+ return 1;
+ }
+
+
+ static table_t *inittable(uint4 maxngrams)
+-{
++{
+ table_t *result = (table_t *)wg_zalloc( sizeof(table_t) );
+ result->table = (entry_t **)wg_zalloc( sizeof(entry_t*) * TABLESIZE );
+ result->pool = wgmempool_Init( 10000, 10 );
+@@ -347,14 +338,14 @@
+ wgmempool_Done(t->pool);
+ wg_free(t->table);
+ wg_free(t->heap);
+- wg_free(t);
++ wg_free(t);
+ }
+
+
+ extern void *fp_Init(const char *name)
+ {
+ fp_t *h = (fp_t *)wg_zalloc( sizeof(fp_t) );
+-
++
+ if ( name ) {
+ h->name = wg_strdup(name);
+ }
+@@ -458,21 +449,27 @@
+ return dest;
+ }
+
+-
++/**
++* this function extract all n-gram from past buffer and put them into the table "t"
++* [modified] by Jocelyn Merand to accept utf-8 multi-character symbols to be used in OpenOffice
++*/
+ static void createngramtable( table_t *t, const char *buf )
+ {
+ char n[MAXNGRAMSIZE+1];
+ const char *p = buf;
+ int i;
++ int pointer = 0;
+
+ /*** Get all n-grams where 1<=n<=MAXNGRAMSIZE. Allow underscores only at borders. ***/
+- for (;;p++) {
++ while(1) {
+
+- const char *q = p;
++ const char *q = &p[pointer]; /*[modified] previously p++ above (for(;;p++)) now, it's pointer wich is increased so we have to get the new pointer on the buffer*/
+ char *m = n;
+
+ /*** First char may be an underscore ***/
+- *m++ = *q++;
++ int decay = charcopy(q, m); /*[modified] previously *q++ = *m++*/
++ q = &(p[pointer+decay]); /*[modified] the old copying method do not manage multi-character symbols*/
++ m += decay; /*[modified]*/
+ *m = '\0';
+
+ increasefreq( t, n, 1 );
+@@ -482,19 +479,22 @@
+ }
+
+ /*** Let the compiler unroll this ***/
+- for ( i=2; i<=MAXNGRAMSIZE; i++) {
++ for ( i=2; i<=MAXNGRAMSYMBOL; i++) {
+
+- *m++ = *q;
++ decay = charcopy(q, m); /*[modified] like above*/
++ m += decay;
+ *m = '\0';
+
+ increasefreq( t, n, i );
+
+ if ( *q == '_' ) break;
+- q++;
++ q += decay;
+ if ( *q == '\0' ) {
+ return;
+ }
+ }
++
++ pointer = nextcharstart(p,pointer); /*[modified] p[pointer] must point on the next start of symbol, but whith utf next start is not surely next char*/
+ }
+ return;
+ }
+@@ -514,7 +514,7 @@
+ {
+ ngram_t *x = (ngram_t *)a;
+ ngram_t *y = (ngram_t *)b;
+-
++
+ return mystrcmp( x->str, y->str );
+ }
+
+@@ -522,12 +522,12 @@
+ {
+ ngram_t *x = (ngram_t *)a;
+ ngram_t *y = (ngram_t *)b;
+-
++
+ return x->rank - y->rank;
+ }
+
+ /**
+- * Create a fingerprint:
++ * Create a fingerprint:
+ * - record the frequency of each unique n-gram in a hash table
+ * - take the most frequent n-grams
+ * - sort them alphabetically, recording their relative rank
+@@ -544,20 +544,21 @@
+ }
+
+ /*** Throw out all invalid chars ***/
+- tmp = prepbuffer( buffer, bufsize );
++ tmp = prepbuffer( buffer, bufsize );
++ /*printf("Cleaned buffer : %s\n",tmp);*/
+ if ( tmp == NULL ) {
+ return 0;
+ }
+-
+ h = (fp_t*)handle;
+ t = inittable(maxngrams);
++ /*printf("Table initialized\n");*/
+
+ /*** Create a hash table containing n-gram counts ***/
+ createngramtable(t, tmp);
+-
++ /*printf("Table created\n");*/
+ /*** Take the top N n-grams and add them to the profile ***/
+- table2heap(t);
+- maxngrams = WGMIN( maxngrams, t->size );
++ table2heap(t);
++ maxngrams = WGMIN( maxngrams, t->size );
+
+ h->fprint = (ngram_t *)wg_malloc( sizeof(ngram_t) * maxngrams );
+ h->size = maxngrams;
+@@ -568,7 +569,7 @@
+ entry_t tmp2;
+
+ heapextract(t, &tmp2);
+-
++
+ /*** the string and its rank is all we need ***/
+ strcpy( h->fprint[i].str, tmp2.str );
+ h->fprint[i].rank = i;
+@@ -578,7 +579,7 @@
+ wg_free(tmp);
+
+ /*** Sort n-grams alphabetically, for easy comparison ***/
+- qsort( h->fprint, h->size, sizeof(ngram_t), ngramcmp_str );
++ qsort( h->fprint, h->size, sizeof(ngram_t), ngramcmp_str );
+ return 1;
+ }
+
+@@ -608,7 +609,7 @@
+ #endif
+ return 0;
+ }
+-
++
+ h->fprint = (ngram_t *)wg_malloc(maxngrams * sizeof(ngram_t));
+
+ while (cnt < maxngrams && wg_getline(line,1024,fp)) {
+@@ -635,7 +636,7 @@
+ h->size = cnt;
+
+ /*** Sort n-grams, for easy comparison later on ***/
+- qsort( h->fprint, h->size, sizeof(ngram_t), ngramcmp_str );
++ qsort( h->fprint, h->size, sizeof(ngram_t), ngramcmp_str );
+
+ fclose(fp);
+
+@@ -648,14 +649,15 @@
+ {
+ uint4 i;
+ fp_t *h = (fp_t *)handle;
+- ngram_t *tmp = wg_malloc( sizeof(ngram_t) * h->size );
+-
++ ngram_t *tmp = (ngram_t*)wg_malloc( sizeof(ngram_t) * h->size );
++
+ /*** Make a temporary and sort it on rank ***/
+ memcpy( tmp, h->fprint, h->size * sizeof(ngram_t) );
+- qsort( tmp, h->size, sizeof(ngram_t), ngramcmp_rank );
++ qsort( tmp, h->size, sizeof(ngram_t), ngramcmp_rank );
+
+ for (i=0; i<h->size; i++) {
+- fprintf( fp, "%s\n", tmp[i].str );
++ /*fprintf( fp, "%s\t%i\n", tmp[i].str, tmp[i].rank );*/
++ fprintf( fp, "%s\n", tmp[i].str);
+ }
+ wg_free( tmp );
+ }
+@@ -669,7 +671,7 @@
+ uint4 i = 0;
+ uint4 j = 0;
+ sint4 sum = 0;
+-
++
+ /*** Compare the profiles in mergesort fashion ***/
+ while ( i < c->size && j < u->size ) {
+
+@@ -705,7 +707,7 @@
+ }
+
+ return sum;
+-
++
+ }
+
+
+--- misc/libtextcat-2.2/src/fingerprint.h Mon May 19 14:16:31 2003
++++ misc/build/libtextcat-2.2/src/fingerprint.h Mon Mar 31 11:29:14 2008
+@@ -41,7 +41,13 @@
+ extern int fp_Read( void *handle, const char *fname, int maxngrams );
+ extern sint4 fp_Compare( void *cat, void *unknown, int cutoff );
+ extern void fp_Show( void *handle );
++#ifdef __cplusplus
++extern "C" {
++#endif
+ extern const char *fp_Name( void *handle );
++#ifdef __cplusplus
++}
++#endif
+ extern void fp_Print( void *handle, FILE *fp );
+
+ #endif
+--- misc/libtextcat-2.2/src/libtextcat.map Mon Mar 31 11:30:06 2008
++++ misc/build/libtextcat-2.2/src/libtextcat.map Mon Mar 31 11:29:14 2008
+@@ -1 +1,40 @@
+-dummy
++{
++ global:
++ charcopy
++ issame
++ nextcharstart
++ utfstrlen
++ wgmempool_Done
++ wgmempool_Init
++ wgmempool_Reset
++ wgmempool_alloc
++ wgmempool_getline
++ wgmempool_strdup
++ special_textcat_Init
++ textcat_Classify
++ textcat_Done
++ textcat_Init
++ textcat_Version
++ fp_Compare
++ fp_Create
++ fp_Debug
++ fp_Done
++ fp_Init
++ fp_Name
++ fp_Print
++ fp_Read
++ heapextract
++ wg_calloc
++ wg_free
++ wg_getline
++ wg_malloc
++ wg_split
++ wg_strdup
++ wg_strgmov
++ wg_trim
++ wg_zalloc
++ wgmem_error
++
++ local:
++ *;
++}
+--- misc/libtextcat-2.2/src/makefile.mk Mon Mar 31 11:30:06 2008
++++ misc/build/libtextcat-2.2/src/makefile.mk Mon Mar 31 11:29:42 2008
+@@ -1 +1,87 @@
+-dummy
++#*************************************************************************
++#
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# Copyright 2000, 2010 Oracle and/or its affiliates.
++#
++# OpenOffice.org - a multi-platform office productivity suite
++#
++# This file is part of OpenOffice.org.
++#
++# OpenOffice.org is free software: you can redistribute it and/or modify
++# it under the terms of the GNU Lesser General Public License version 3
++# only, as published by the Free Software Foundation.
++#
++# OpenOffice.org is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU Lesser General Public License version 3 for more details
++# (a copy is included in the LICENSE file that accompanied this code).
++#
++# You should have received a copy of the GNU Lesser General Public License
++# version 3 along with OpenOffice.org. If not, see
++# <http://www.openoffice.org/license.html>
++# for a copy of the LGPLv3 License.
++#
++#*************************************************************************
++
++PRJ = ..$/..$/..$/..$/..
++
++PRJNAME = libtextcat
++TARGET = libtextcat
++CFLAGSCALL=gsd
++
++USE_DEFFILE=TRUE
++EXTERNAL_WARNINGS_NOT_ERRORS := TRUE
++UWINAPILIB=
++
++.INCLUDE : settings.mk
++
++# --- Files --------------------------------------------------------
++
++# !! not to be compiled because those belong to a stand alone programs: !!
++# $(SLO)$/createfp.obj\
++# $(SLO)$/testtextcat.obj
++
++SLOFILES= \
++ $(SLO)$/common.obj\
++ $(SLO)$/fingerprint.obj\
++ $(SLO)$/textcat.obj\
++ $(SLO)$/wg_mempool.obj\
++ $(SLO)$/utf8misc.obj
++
++#SHL1TARGET= $(TARGET)$(UPD)$(DLLPOSTFIX)
++SHL1TARGET= $(TARGET)
++
++SHL1STDLIBS=
++
++# build DLL
++SHL1LIBS= $(SLB)$/$(TARGET).lib
++SHL1IMPLIB= i$(TARGET)
++SHL1DEPN= $(SHL1LIBS)
++SHL1DEF= $(MISC)$/$(SHL1TARGET).def
++
++# build DEF file
++DEF1NAME= $(SHL1TARGET)
++DEF1DEPN=$(MISC)$/$(SHL1TARGET).flt
++
++SHL1VERSIONMAP= libtextcat.map
++
++# --- Targets ------------------------------------------------------
++
++.INCLUDE : target.mk
++
++# copy hand supplied configuration file for Win32 builds to the file
++# which is included in the source code
++$(SLOFILES) : config.h
++config.h :
++ $(GNUCOPY) $(OUT)$/misc$/build$/libtextcat-2.2$/src$/win32_config.h $(OUT)$/misc$/build$/libtextcat-2.2$/src$/config.h
++
++
++$(MISC)$/$(SHL1TARGET).flt: makefile.mk
++ @echo ------------------------------
++ @echo Making: $@
++ @echo Imp>$@
++ @echo __CT>>$@
++ @echo _real>>$@
++ @echo unnamed>>$@
+--- misc/libtextcat-2.2/src/textcat.c Thu May 22 13:32:43 2003
++++ misc/build/libtextcat-2.2/src/textcat.c Mon Mar 31 11:29:14 2008
+@@ -4,23 +4,23 @@
+ * Copyright (C) 2003 WiseGuys Internet B.V.
+ *
+ * THE BSD LICENSE
+- *
++ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+- *
++ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+- *
++ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+- *
++ *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+- *
++ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@@ -74,6 +74,7 @@
+ typedef struct {
+
+ void **fprint;
++ char *fprint_disable;
+ uint4 size;
+ uint4 maxsize;
+
+@@ -112,11 +113,21 @@
+ fp_Done( h->fprint[i] );
+ }
+ wg_free( h->fprint );
++ wg_free( h->fprint_disable );
+ wg_free( h );
+
+ }
+
+-extern void *textcat_Init( const char *conffile )
++/** Replaces older function */
++extern void *textcat_Init( const char *conffile ){
++ return special_textcat_Init( conffile, DEFAULT_FINGERPRINTS_PATH );
++}
++
++/**
++ * Originaly this function had only one parameter (conffile) it has been modified since OOo use
++ * Basicaly prefix is the directory path where fingerprints are stored
++ */
++extern void *special_textcat_Init( const char *conffile, const char *prefix )
+ {
+ textcat_t *h;
+ char line[1024];
+@@ -134,11 +145,13 @@
+ h->size = 0;
+ h->maxsize = 16;
+ h->fprint = (void **)wg_malloc( sizeof(void*) * h->maxsize );
++ h->fprint_disable = (char *)wg_malloc( sizeof(char*) * h->maxsize ); /*added to store the state of languages*/
+
+ while ( wg_getline( line, 1024, fp ) ) {
+ char *p;
+ char *segment[4];
+- int res;
++ char finger_print_file_name[512];
++ int res;
+
+ /*** Skip comments ***/
+ #ifdef HAVE_STRCHR
+@@ -156,17 +169,23 @@
+ /*** Ensure enough space ***/
+ if ( h->size == h->maxsize ) {
+ h->maxsize *= 2;
+- h->fprint = (void *)wg_realloc( h->fprint, sizeof(void*) * h->maxsize );
++ h->fprint = (void **)wg_realloc( h->fprint, sizeof(void*) * h->maxsize );
++ h->fprint_disable = (char *)wg_realloc( h->fprint_disable, sizeof(char*) * h->maxsize );
+ }
+
+ /*** Load data ***/
+ if ((h->fprint[ h->size ] = fp_Init( segment[1] ))==NULL) {
+ goto ERROR;
+ }
+- if ( fp_Read( h->fprint[h->size], segment[0], 400 ) == 0 ) {
++ finger_print_file_name[0] = '\0';
++ strcat(finger_print_file_name, prefix);
++ strcat(finger_print_file_name, segment[0]);
++
++ if ( fp_Read( h->fprint[h->size], finger_print_file_name, 400 ) == 0 ) {
+ textcat_Done(h);
+ goto ERROR;
+- }
++ }
++ h->fprint_disable[h->size] = 0xF0; /*0xF0 is the code for enabled languages, 0x0F is for disabled*/
+ h->size++;
+ }
+
+@@ -203,11 +222,18 @@
+ result = _TEXTCAT_RESULT_SHORT;
+ goto READY;
+ }
+-
++
+ /*** Calculate the score for each category. ***/
+ for (i=0; i<h->size; i++) {
+- int score = fp_Compare( h->fprint[i], unknown, threshold );
+- candidates[i].score = score;
++ int score;
++ if(h->fprint_disable[i] & 0x0F){ /*if this language is disabled*/
++ score = MAXSCORE;
++ }
++ else{
++ score = fp_Compare( h->fprint[i], unknown, threshold );
++ /*printf("Score for %s : %i\n", fp_Name(h->fprint[i]), score);*/
++ }
++ candidates[i].score = score;
+ candidates[i].name = fp_Name( h->fprint[i] );
+ if ( score < minscore ) {
+ minscore = score;
+@@ -218,7 +244,6 @@
+ /*** Find the best performers ***/
+ for (i=0; i<h->size; i++) {
+ if ( candidates[i].score < threshold ) {
+-
+ if ( ++cnt == MAXCANDIDATES+1 ) {
+ break;
+ }
+@@ -235,7 +260,7 @@
+ else {
+ char *p = result;
+ char *plimit = result+MAXOUTPUTSIZE;
+-
++
+ qsort( candidates, cnt, sizeof(candidate_t), cmpcandidates );
+
+ *p = '\0';
+@@ -247,7 +272,7 @@
+ }
+ READY:
+ fp_Done(unknown);
+-#ifdef SHOULD_FREE
++#ifdef SHOULD_FREE
+ free(candidates);
+ #undef SHOULD_FREE
+ #endif
+--- misc/libtextcat-2.2/src/textcat.h Mon May 19 14:16:31 2003
++++ misc/build/libtextcat-2.2/src/textcat.h Mon Mar 31 11:29:14 2008
+@@ -40,6 +40,9 @@
+ #define _TEXTCAT_RESULT_UNKOWN "UNKNOWN"
+ #define _TEXTCAT_RESULT_SHORT "SHORT"
+
++#ifdef __cplusplus
++extern "C" {
++#endif
+
+ /**
+ * textcat_Init() - Initialize the text classifier. The textfile
+@@ -51,10 +54,19 @@
+ * Returns: handle on success, NULL on error. (At the moment, the
+ * only way errors can occur, is when the library cannot read the
+ * conffile, or one of the fingerprint files listed in it.)
++ *
++ * Replace older function (and has exacly the same behaviour)
++ * see below
+ */
+ extern void *textcat_Init( const char *conffile );
+
+ /**
++ * Originaly this function had only one parameter (conffile) it has been modified since OOo must be able to load alternativ DB
++ * Basicaly prefix is the directory path where fingerprints are stored
++ */
++extern void *special_textcat_Init( const char *conffile, const char *prefix );
++
++/**
+ * textcat_Done() - Free up resources for handle
+ */
+ extern void textcat_Done( void *handle );
+@@ -77,4 +89,8 @@
+ * textcat_Version() - Returns a string describing the version of this classifier.
+ */
+ extern char *textcat_Version();
++
++#ifdef __cplusplus
++}
++#endif
+ #endif
+--- misc/libtextcat-2.2/src/utf8misc.c Mon Mar 31 11:30:06 2008
++++ misc/build/libtextcat-2.2/src/utf8misc.c Mon Mar 31 11:29:14 2008
+@@ -1 +1,132 @@
+-dummy
++/***************************************************************************
++ * Copyright (C) 2006 by Jocelyn Merand *
++ * joc.mer@gmail.com *
++ * *
++ * THE BSD LICENSE
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in the
++ * documentation and/or other materials provided with the
++ * distribution.
++ *
++ * - Neither the name of the WiseGuys Internet B.V. nor the names of
++ * its contributors may be used to endorse or promote products derived
++ * from this software without specific prior written permission.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ ***************************************************************************/
++
++#ifndef _UTF8_MISC_H_
++#include "utf8misc.h"
++#endif
++
++
++int nextcharstart(const char *str, int position){
++ int pointer = position;
++
++ if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
++
++ /*then str[pointer] is an escape character*/
++
++ char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count (by bit translation) following characters (only the weightest part)*/
++
++ while(escape_char & ESCAPE_MASK && str[pointer]){/*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/
++ escape_char = escape_char <<1;
++ ++pointer;
++ }
++ }
++ if(str[pointer]){ /*finaly, if we are not on the \0 character, we jump to the next character*/
++ ++pointer;
++ }
++ return pointer;
++}
++
++
++int charcopy(const char *str, char *dest){
++
++ int pointer = 0;
++ if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
++
++ /*then str[pointer] is an escape character*/
++
++ char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count following characters (only the weightest part)*/
++
++ while(escape_char & ESCAPE_MASK && str[pointer]){ /*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/
++ dest[pointer] = str[pointer];
++ escape_char = escape_char <<1;
++ ++pointer;
++ }
++ }
++ if(str[pointer]){
++ dest[pointer] = str[pointer];
++ ++pointer;
++ }
++
++ return pointer;
++}
++
++
++int issame( char *lex, char *key, int len )
++{
++ /*printf("[%s] prefix of [%s] with length %i", lex, key, len);*/
++ int char_counter = 0;
++ int pointer = 0;
++ while(char_counter < len) {
++
++ if(key[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
++
++ /*then key[pointer] is an escap character*/
++
++ char escape_char = ((key[pointer] & WEIGHT_MASK) << 1); /*and we use it to count (only the weightest part)*/
++
++ while(escape_char & ESCAPE_MASK && key[pointer] == lex[pointer] ){
++ escape_char = escape_char <<1;
++ ++pointer;
++ }
++ }
++ ++char_counter; /*and we are on a new utf8 character*/
++ if ( key[pointer] != lex[pointer] ) {
++ return 0;
++ /*printf(" NO\n", lex, key, len);*/
++ }
++ ++pointer;
++ }
++ if ( lex[pointer] != '\0' ) {
++ return 0;
++ /*printf(" NO\n");*/
++ }
++
++ /*printf(" YES\n");*/
++
++ return 1;
++}
++
++
++extern int utfstrlen(const char* str){
++ int char_counter = 0;
++ int pointer = 0;
++ while(str[pointer]) {
++ pointer = nextcharstart(str, pointer);
++
++ ++char_counter; /*and we are on a new utf8 character*/
++ }
++ return char_counter;
++}
++
+--- misc/libtextcat-2.2/src/utf8misc.h Mon Mar 31 11:30:06 2008
++++ misc/build/libtextcat-2.2/src/utf8misc.h Mon Mar 31 11:29:14 2008
+@@ -1 +1,88 @@
+-dummy
++/***************************************************************************
++ * Copyright (C) 2006 by Jocelyn Merand *
++ * joc.mer@gmail.com *
++ * *
++ * THE BSD LICENSE
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in the
++ * documentation and/or other materials provided with the
++ * distribution.
++ *
++ * - Neither the name of the WiseGuys Internet B.V. nor the names of
++ * its contributors may be used to endorse or promote products derived
++ * from this software without specific prior written permission.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ ***************************************************************************/
++
++#ifndef _UTF8_MISC_H_
++#define _UTF8_MISC_H_
++
++/**
++ * These variables are used in character processing functions
++ * These have been added to manage utf-8 symbols, particularly escape chars
++ */
++#ifdef _UTF8_
++#define ESCAPE_MASK 0x80
++#define WEIGHT_MASK 0xF0
++#else
++#define ESCAPE_MASK 0xFF
++#define WEIGHT_MASK 0x00
++#endif
++
++
++/*
++ * Is used to jump to the next start of char
++ * of course it's only usefull when encoding is utf-8
++ * This function have been added by Jocelyn Merand to use libtextcat in OOo
++ */
++int nextcharstart(const char *str, int position);
++
++
++/*Copy the char in str to dest
++ * of course it's only usefull when encoding is utf8 and the symbol is encoded with more than 1 char
++ * return the number of char jumped
++ * This function have been added by Jocelyn Merand to use libtextcat in OOo
++ */
++int charcopy(const char *str, char *dest);
++
++
++/* checks if n-gram lex is a prefix of key and of length len
++* if _UTF8_ is defined, it uses escap characters and len is not realy the length of lex
++* in this case, len is the number of utf-8 char strlen("€") == 3 but len == 1
++*/
++int issame( char *lex, char *key, int len );
++
++
++/* Counts the number of characters
++* if _UTF8_ is defined, it uses escap characters and the result is not realy the length of str
++* in this case, the result is the number of utf-8 char strlen("€") == 3 but utfstrlen("€") == 1
++*/
++#ifdef __cplusplus
++extern "C" {
++#endif
++extern int utfstrlen(const char* str);
++#ifdef __cplusplus
++}
++#endif
++
++#endif
++
+--- misc/libtextcat-2.2/src/win32_config.h Mon Mar 31 11:30:06 2008
++++ misc/build/libtextcat-2.2/src/win32_config.h Mon Mar 31 11:29:14 2008
+@@ -1 +1,136 @@
+-dummy
++/* src/config.h. Generated by configure. */
++/* src/config.h.in. Generated from configure.ac by autoheader. */
++
++/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
++ systems. This function is required for `alloca.c' support on those systems.
++ */
++/* #undef CRAY_STACKSEG_END */
++
++/* Define to 1 if using `alloca.c'. */
++/* #undef C_ALLOCA */
++
++/* Define to 1 if you have `alloca', as a function or macro. */
++/* #undef HAVE_ALLOCA */
++
++/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
++ */
++/* #undef HAVE_ALLOCA_H */
++
++/* Define to 1 if you have the <dlfcn.h> header file. */
++#define HAVE_DLFCN_H 1
++
++/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */
++/* #undef HAVE_DOPRNT */
++
++/* Define to 1 if you have the `gettimeofday' function. */
++/* #undef HAVE_GETTIMEOFDAY */
++
++/* Define to 1 if you have the <inttypes.h> header file. */
++/* #undef HAVE_INTTYPES_H */
++
++/* Define to 1 if you have the <limits.h> header file. */
++#define HAVE_LIMITS_H 1
++
++/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
++ to 0 otherwise. */
++#define HAVE_MALLOC 1
++
++/* Define to 1 if you have the <memory.h> header file. */
++#define HAVE_MEMORY_H 1
++
++/* Define to 1 if you have the `memset' function. */
++#define HAVE_MEMSET 1
++
++/* Define to 1 if your system has a GNU libc compatible `realloc' function,
++ and to 0 otherwise. */
++#define HAVE_REALLOC 1
++
++/* Define to 1 if you have the <stdint.h> header file. */
++/* #undef HAVE_STDINT_H */
++
++/* Define to 1 if you have the <stdlib.h> header file. */
++#define HAVE_STDLIB_H 1
++
++/* Define to 1 if you have the `strchr' function. */
++#define HAVE_STRCHR 1
++
++/* Define to 1 if you have the `strdup' function. */
++#define HAVE_STRDUP 1
++
++/* Define to 1 if you have the <strings.h> header file. */
++/* #undef HAVE_STRINGS_H */
++
++/* Define to 1 if you have the <string.h> header file. */
++#define HAVE_STRING_H 1
++
++/* Define to 1 if you have the `strpbrk' function. */
++#define HAVE_STRPBRK 1
++
++/* Define to 1 if you have the <sys/stat.h> header file. */
++#define HAVE_SYS_STAT_H 1
++
++/* Define to 1 if you have the <sys/time.h> header file. */
++/* #undef HAVE_SYS_TIME_H */
++
++/* Define to 1 if you have the <sys/types.h> header file. */
++#define HAVE_SYS_TYPES_H 1
++
++/* Define to 1 if you have the <unistd.h> header file. */
++#define HAVE_UNISTD_H 1
++
++/* Define to 1 if you have the `vprintf' function. */
++#define HAVE_VPRINTF 1
++
++/* Name of package */
++#define PACKAGE "libtextcat"
++
++/* Define to the address where bug reports for this package should be sent. */
++#define PACKAGE_BUGREPORT ""
++
++/* Define to the full name of this package. */
++#define PACKAGE_NAME "libtextcat"
++
++/* Define to the full name and version of this package. */
++#define PACKAGE_STRING "libtextcat 2.2"
++
++/* Define to the one symbol short name of this package. */
++#define PACKAGE_TARNAME "libtextcat"
++
++/* Define to the version of this package. */
++#define PACKAGE_VERSION "2.2"
++
++/* If using the C implementation of alloca, define if you know the
++ direction of stack growth for your system; otherwise it will be
++ automatically deduced at run-time.
++ STACK_DIRECTION > 0 => grows toward higher addresses
++ STACK_DIRECTION < 0 => grows toward lower addresses
++ STACK_DIRECTION = 0 => direction of growth unknown */
++/* #undef STACK_DIRECTION */
++
++/* Define to 1 if you have the ANSI C header files. */
++#define STDC_HEADERS 1
++
++/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
++#define TIME_WITH_SYS_TIME 1
++
++/* Define to 1 if your <sys/time.h> declares `struct tm'. */
++/* #undef TM_IN_SYS_TIME */
++
++/* Version number of package */
++#define VERSION "2.2"
++
++/* Define to empty if `const' does not conform to ANSI C. */
++/* #undef const */
++
++/* Define as `__inline' if that's what the C compiler calls it, or to nothing
++ if it is not supported. */
++/* #undef inline */
++
++/* Define to rpl_malloc if the replacement function should be used. */
++/* #undef malloc */
++
++/* Define to rpl_realloc if the replacement function should be used. */
++/* #undef realloc */
++
++/* Define to `unsigned' if <sys/types.h> does not define. */
++/* #undef size_t */
+--- misc/libtextcat-2.2/config.guess 2010-04-15 09:20:04.000000000 +0000
++++ misc/build/libtextcat-2.2/config.guess 2010-04-15 09:20:41.000000000 +0000
+@@ -1,9 +1,10 @@
+ #! /bin/sh
+ # Attempt to guess a canonical system name.
+ # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+-# 2000, 2001, 2002 Free Software Foundation, Inc.
++# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
++# Free Software Foundation, Inc.
+
+-timestamp='2002-10-21'
++timestamp='2009-12-30'
+
+ # This file is free software; you can redistribute it and/or modify it
+ # under the terms of the GNU General Public License as published by
+@@ -17,23 +18,25 @@
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with this program; if not, write to the Free Software
+-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
++# 02110-1301, USA.
+ #
+ # As a special exception to the GNU General Public License, if you
+ # distribute this file as part of a program that contains a
+ # configuration script generated by Autoconf, you may include it under
+ # the same distribution terms that you use for the rest of that program.
+
+-# Originally written by Per Bothner <per@bothner.com>.
+-# Please send patches to <config-patches@gnu.org>. Submit a context
+-# diff and a properly formatted ChangeLog entry.
++
++# Originally written by Per Bothner. Please send patches (context
++# diff format) to <config-patches@gnu.org> and include a ChangeLog
++# entry.
+ #
+ # This script attempts to guess a canonical system name similar to
+ # config.sub. If it succeeds, it prints the system name on stdout, and
+ # exits with 0. Otherwise, it exits with 1.
+ #
+-# The plan is that this can be called by configure scripts if you
+-# don't specify an explicit build system type.
++# You can get the latest version of this script from:
++# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+
+ me=`echo "$0" | sed -e 's,.*/,,'`
+
+@@ -53,8 +56,9 @@
+ GNU config.guess ($timestamp)
+
+ Originally written by Per Bothner.
+-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+-Free Software Foundation, Inc.
++Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
++2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
++Software Foundation, Inc.
+
+ This is free software; see the source for copying conditions. There is NO
+ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+@@ -66,11 +70,11 @@
+ while test $# -gt 0 ; do
+ case $1 in
+ --time-stamp | --time* | -t )
+- echo "$timestamp" ; exit 0 ;;
++ echo "$timestamp" ; exit ;;
+ --version | -v )
+- echo "$version" ; exit 0 ;;
++ echo "$version" ; exit ;;
+ --help | --h* | -h )
+- echo "$usage"; exit 0 ;;
++ echo "$usage"; exit ;;
+ -- ) # Stop option processing
+ shift; break ;;
+ - ) # Use stdin as input.
+@@ -98,14 +102,18 @@
+ # Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+ # use `HOST_CC' if defined, but it is deprecated.
+
+-# This shell variable is my proudest work .. or something. --bje
++# Portable tmp directory creation inspired by the Autoconf team.
+
+-set_cc_for_build='tmpdir=${TMPDIR-/tmp}/config-guess-$$ ;
+-(old=`umask` && umask 077 && mkdir $tmpdir && umask $old && unset old)
+- || (echo "$me: cannot create $tmpdir" >&2 && exit 1) ;
+-dummy=$tmpdir/dummy ;
+-files="$dummy.c $dummy.o $dummy.rel $dummy" ;
+-trap '"'"'rm -f $files; rmdir $tmpdir; exit 1'"'"' 1 2 15 ;
++set_cc_for_build='
++trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
++trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
++: ${TMPDIR=/tmp} ;
++ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
++ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
++ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
++ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
++dummy=$tmp/dummy ;
++tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+ case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,) echo "int x;" > $dummy.c ;
+ for c in cc gcc c89 c99 ; do
+@@ -113,15 +121,13 @@
+ CC_FOR_BUILD="$c"; break ;
+ fi ;
+ done ;
+- rm -f $files ;
+ if test x"$CC_FOR_BUILD" = x ; then
+ CC_FOR_BUILD=no_compiler_found ;
+ fi
+ ;;
+ ,,*) CC_FOR_BUILD=$CC ;;
+ ,*,*) CC_FOR_BUILD=$HOST_CC ;;
+-esac ;
+-unset files'
++esac ; set_cc_for_build= ;'
+
+ # This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+ # (ghazi@noc.rutgers.edu 1994-08-24)
+@@ -156,6 +162,7 @@
+ arm*) machine=arm-unknown ;;
+ sh3el) machine=shl-unknown ;;
+ sh3eb) machine=sh-unknown ;;
++ sh5el) machine=sh5le-unknown ;;
+ *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+ esac
+ # The Operating System including object format, if it has switched
+@@ -164,7 +171,7 @@
+ arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+ eval $set_cc_for_build
+ if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+- | grep __ELF__ >/dev/null
++ | grep -q __ELF__
+ then
+ # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+ # Return netbsd for either. FIX?
+@@ -178,144 +185,128 @@
+ ;;
+ esac
+ # The OS release
+- release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
++ # Debian GNU/NetBSD machines have a different userland, and
++ # thus, need a distinct triplet. However, they do not need
++ # kernel version information, so it can be replaced with a
++ # suitable tag, in the style of linux-gnu.
++ case "${UNAME_VERSION}" in
++ Debian*)
++ release='-gnu'
++ ;;
++ *)
++ release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
++ ;;
++ esac
+ # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+ # contains redundant information, the shorter form:
+ # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+ echo "${machine}-${os}${release}"
+- exit 0 ;;
+- amiga:OpenBSD:*:*)
+- echo m68k-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- arc:OpenBSD:*:*)
+- echo mipsel-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- hp300:OpenBSD:*:*)
+- echo m68k-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- mac68k:OpenBSD:*:*)
+- echo m68k-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- macppc:OpenBSD:*:*)
+- echo powerpc-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- mvme68k:OpenBSD:*:*)
+- echo m68k-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- mvme88k:OpenBSD:*:*)
+- echo m88k-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- mvmeppc:OpenBSD:*:*)
+- echo powerpc-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- pmax:OpenBSD:*:*)
+- echo mipsel-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- sgi:OpenBSD:*:*)
+- echo mipseb-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- sun3:OpenBSD:*:*)
+- echo m68k-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
+- wgrisc:OpenBSD:*:*)
+- echo mipsel-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *:OpenBSD:*:*)
+- echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE}
+- exit 0 ;;
++ UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
++ echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
++ exit ;;
++ *:ekkoBSD:*:*)
++ echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
++ exit ;;
++ *:SolidBSD:*:*)
++ echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
++ exit ;;
++ macppc:MirBSD:*:*)
++ echo powerpc-unknown-mirbsd${UNAME_RELEASE}
++ exit ;;
++ *:MirBSD:*:*)
++ echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
++ exit ;;
+ alpha:OSF1:*:*)
+- if test $UNAME_RELEASE = "V4.0"; then
++ case $UNAME_RELEASE in
++ *4.0)
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+- fi
++ ;;
++ *5.*)
++ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
++ ;;
++ esac
++ # According to Compaq, /usr/sbin/psrinfo has been available on
++ # OSF/1 and Tru64 systems produced since 1995. I hope that
++ # covers most systems running today. This code pipes the CPU
++ # types through head -n 1, so we only detect the type of CPU 0.
++ ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1`
++ case "$ALPHA_CPU_TYPE" in
++ "EV4 (21064)")
++ UNAME_MACHINE="alpha" ;;
++ "EV4.5 (21064)")
++ UNAME_MACHINE="alpha" ;;
++ "LCA4 (21066/21068)")
++ UNAME_MACHINE="alpha" ;;
++ "EV5 (21164)")
++ UNAME_MACHINE="alphaev5" ;;
++ "EV5.6 (21164A)")
++ UNAME_MACHINE="alphaev56" ;;
++ "EV5.6 (21164PC)")
++ UNAME_MACHINE="alphapca56" ;;
++ "EV5.7 (21164PC)")
++ UNAME_MACHINE="alphapca57" ;;
++ "EV6 (21264)")
++ UNAME_MACHINE="alphaev6" ;;
++ "EV6.7 (21264A)")
++ UNAME_MACHINE="alphaev67" ;;
++ "EV6.8CB (21264C)")
++ UNAME_MACHINE="alphaev68" ;;
++ "EV6.8AL (21264B)")
++ UNAME_MACHINE="alphaev68" ;;
++ "EV6.8CX (21264D)")
++ UNAME_MACHINE="alphaev68" ;;
++ "EV6.9A (21264/EV69A)")
++ UNAME_MACHINE="alphaev69" ;;
++ "EV7 (21364)")
++ UNAME_MACHINE="alphaev7" ;;
++ "EV7.9 (21364A)")
++ UNAME_MACHINE="alphaev79" ;;
++ esac
++ # A Pn.n version is a patched version.
+ # A Vn.n version is a released version.
+ # A Tn.n version is a released field test version.
+ # A Xn.n version is an unreleased experimental baselevel.
+ # 1.2 uses "1.2" for uname -r.
+- eval $set_cc_for_build
+- cat <<EOF >$dummy.s
+- .data
+-\$Lformat:
+- .byte 37,100,45,37,120,10,0 # "%d-%x\n"
+-
+- .text
+- .globl main
+- .align 4
+- .ent main
+-main:
+- .frame \$30,16,\$26,0
+- ldgp \$29,0(\$27)
+- .prologue 1
+- .long 0x47e03d80 # implver \$0
+- lda \$2,-1
+- .long 0x47e20c21 # amask \$2,\$1
+- lda \$16,\$Lformat
+- mov \$0,\$17
+- not \$1,\$18
+- jsr \$26,printf
+- ldgp \$29,0(\$26)
+- mov 0,\$16
+- jsr \$26,exit
+- .end main
+-EOF
+- $CC_FOR_BUILD -o $dummy $dummy.s 2>/dev/null
+- if test "$?" = 0 ; then
+- case `$dummy` in
+- 0-0)
+- UNAME_MACHINE="alpha"
+- ;;
+- 1-0)
+- UNAME_MACHINE="alphaev5"
+- ;;
+- 1-1)
+- UNAME_MACHINE="alphaev56"
+- ;;
+- 1-101)
+- UNAME_MACHINE="alphapca56"
+- ;;
+- 2-303)
+- UNAME_MACHINE="alphaev6"
+- ;;
+- 2-307)
+- UNAME_MACHINE="alphaev67"
+- ;;
+- 2-1307)
+- UNAME_MACHINE="alphaev68"
+- ;;
+- 3-1307)
+- UNAME_MACHINE="alphaev7"
+- ;;
+- esac
+- fi
+- rm -f $dummy.s $dummy && rmdir $tmpdir
+- echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+- exit 0 ;;
++ echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
++ exit ;;
+ Alpha\ *:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # Should we change UNAME_MACHINE based on the output of uname instead
+ # of the specific Alpha model?
+ echo alpha-pc-interix
+- exit 0 ;;
++ exit ;;
+ 21064:Windows_NT:50:3)
+ echo alpha-dec-winnt3.5
+- exit 0 ;;
++ exit ;;
+ Amiga*:UNIX_System_V:4.0:*)
+ echo m68k-unknown-sysv4
+- exit 0;;
++ exit ;;
+ *:[Aa]miga[Oo][Ss]:*:*)
+ echo ${UNAME_MACHINE}-unknown-amigaos
+- exit 0 ;;
++ exit ;;
+ *:[Mm]orph[Oo][Ss]:*:*)
+ echo ${UNAME_MACHINE}-unknown-morphos
+- exit 0 ;;
++ exit ;;
+ *:OS/390:*:*)
+ echo i370-ibm-openedition
+- exit 0 ;;
++ exit ;;
++ *:z/VM:*:*)
++ echo s390-ibm-zvmoe
++ exit ;;
++ *:OS400:*:*)
++ echo powerpc-ibm-os400
++ exit ;;
+ arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+ echo arm-acorn-riscix${UNAME_RELEASE}
+- exit 0;;
++ exit ;;
++ arm:riscos:*:*|arm:RISCOS:*:*)
++ echo arm-unknown-riscos
++ exit ;;
+ SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+ echo hppa1.1-hitachi-hiuxmpp
+- exit 0;;
++ exit ;;
+ Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+ # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+ if test "`(/bin/universe) 2>/dev/null`" = att ; then
+@@ -323,29 +314,51 @@
+ else
+ echo pyramid-pyramid-bsd
+ fi
+- exit 0 ;;
++ exit ;;
+ NILE*:*:*:dcosx)
+ echo pyramid-pyramid-svr4
+- exit 0 ;;
+- DRS?6000:UNIX_SV:4.2*:7*)
++ exit ;;
++ DRS?6000:unix:4.0:6*)
++ echo sparc-icl-nx6
++ exit ;;
++ DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+ case `/usr/bin/uname -p` in
+- sparc) echo sparc-icl-nx7 && exit 0 ;;
++ sparc) echo sparc-icl-nx7; exit ;;
+ esac ;;
++ s390x:SunOS:*:*)
++ echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
++ exit ;;
+ sun4H:SunOS:5.*:*)
+ echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+- exit 0 ;;
++ exit ;;
+ sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+ echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+- exit 0 ;;
+- i86pc:SunOS:5.*:*)
+- echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+- exit 0 ;;
++ exit ;;
++ i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
++ echo i386-pc-auroraux${UNAME_RELEASE}
++ exit ;;
++ i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
++ eval $set_cc_for_build
++ SUN_ARCH="i386"
++ # If there is a compiler, see if it is configured for 64-bit objects.
++ # Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
++ # This test works for both compilers.
++ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
++ if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
++ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
++ grep IS_64BIT_ARCH >/dev/null
++ then
++ SUN_ARCH="x86_64"
++ fi
++ fi
++ echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
++ exit ;;
+ sun4*:SunOS:6*:*)
+ # According to config.sub, this is the proper way to canonicalize
+ # SunOS6. Hard to guess exactly what SunOS6 will be like, but
+ # it's likely to be more like Solaris than SunOS4.
+ echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+- exit 0 ;;
++ exit ;;
+ sun4*:SunOS:*:*)
+ case "`/usr/bin/arch -k`" in
+ Series*|S4*)
+@@ -354,10 +367,10 @@
+ esac
+ # Japanese Language versions have a version number like `4.1.3-JL'.
+ echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+- exit 0 ;;
++ exit ;;
+ sun3*:SunOS:*:*)
+ echo m68k-sun-sunos${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ sun*:*:4.2BSD:*)
+ UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+ test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+@@ -369,10 +382,10 @@
+ echo sparc-sun-sunos${UNAME_RELEASE}
+ ;;
+ esac
+- exit 0 ;;
++ exit ;;
+ aushp:SunOS:*:*)
+ echo sparc-auspex-sunos${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ # The situation for MiNT is a little confusing. The machine name
+ # can be virtually everything (everything which is not
+ # "atarist" or "atariste" at least should have a processor
+@@ -383,37 +396,40 @@
+ # be no problem.
+ atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+ echo m68k-milan-mint${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+ echo m68k-hades-mint${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+ echo m68k-unknown-mint${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
++ m68k:machten:*:*)
++ echo m68k-apple-machten${UNAME_RELEASE}
++ exit ;;
+ powerpc:machten:*:*)
+ echo powerpc-apple-machten${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ RISC*:Mach:*:*)
+ echo mips-dec-mach_bsd4.3
+- exit 0 ;;
++ exit ;;
+ RISC*:ULTRIX:*:*)
+ echo mips-dec-ultrix${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ VAX*:ULTRIX*:*:*)
+ echo vax-dec-ultrix${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ 2020:CLIX:*:* | 2430:CLIX:*:*)
+ echo clipper-intergraph-clix${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ mips:*:*:UMIPS | mips:*:*:RISCos)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+@@ -437,33 +453,33 @@
+ exit (-1);
+ }
+ EOF
+- $CC_FOR_BUILD -o $dummy $dummy.c \
+- && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
+- && rm -f $dummy.c $dummy && rmdir $tmpdir && exit 0
+- rm -f $dummy.c $dummy && rmdir $tmpdir
++ $CC_FOR_BUILD -o $dummy $dummy.c &&
++ dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
++ SYSTEM_NAME=`$dummy $dummyarg` &&
++ { echo "$SYSTEM_NAME"; exit; }
+ echo mips-mips-riscos${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ Motorola:PowerMAX_OS:*:*)
+ echo powerpc-motorola-powermax
+- exit 0 ;;
++ exit ;;
+ Motorola:*:4.3:PL8-*)
+ echo powerpc-harris-powermax
+- exit 0 ;;
+- Night_Hawk:*:*:PowerMAX_OS)
++ exit ;;
++ Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+ echo powerpc-harris-powermax
+- exit 0 ;;
++ exit ;;
+ Night_Hawk:Power_UNIX:*:*)
+ echo powerpc-harris-powerunix
+- exit 0 ;;
++ exit ;;
+ m88k:CX/UX:7*:*)
+ echo m88k-harris-cxux7
+- exit 0 ;;
++ exit ;;
+ m88k:*:4*:R4*)
+ echo m88k-motorola-sysv4
+- exit 0 ;;
++ exit ;;
+ m88k:*:3*:R3*)
+ echo m88k-motorola-sysv3
+- exit 0 ;;
++ exit ;;
+ AViiON:dgux:*:*)
+ # DG/UX returns AViiON for all architectures
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+@@ -479,29 +495,29 @@
+ else
+ echo i586-dg-dgux${UNAME_RELEASE}
+ fi
+- exit 0 ;;
++ exit ;;
+ M88*:DolphinOS:*:*) # DolphinOS (SVR3)
+ echo m88k-dolphin-sysv3
+- exit 0 ;;
++ exit ;;
+ M88*:*:R3*:*)
+ # Delta 88k system running SVR3
+ echo m88k-motorola-sysv3
+- exit 0 ;;
++ exit ;;
+ XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+ echo m88k-tektronix-sysv3
+- exit 0 ;;
++ exit ;;
+ Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+ echo m68k-tektronix-bsd
+- exit 0 ;;
++ exit ;;
+ *:IRIX*:*:*)
+ echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+- exit 0 ;;
++ exit ;;
+ ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+- echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
+- exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX '
++ echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
++ exit ;; # Note that: echo "'`uname -s`'" gives 'AIX '
+ i*86:AIX:*:*)
+ echo i386-ibm-aix
+- exit 0 ;;
++ exit ;;
+ ia64:AIX:*:*)
+ if [ -x /usr/bin/oslevel ] ; then
+ IBM_REV=`/usr/bin/oslevel`
+@@ -509,7 +525,7 @@
+ IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+ fi
+ echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+- exit 0 ;;
++ exit ;;
+ *:AIX:2:3)
+ if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+ eval $set_cc_for_build
+@@ -524,16 +540,19 @@
+ exit(0);
+ }
+ EOF
+- $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && rm -f $dummy.c $dummy && rmdir $tmpdir && exit 0
+- rm -f $dummy.c $dummy && rmdir $tmpdir
+- echo rs6000-ibm-aix3.2.5
++ if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
++ then
++ echo "$SYSTEM_NAME"
++ else
++ echo rs6000-ibm-aix3.2.5
++ fi
+ elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+ echo rs6000-ibm-aix3.2.4
+ else
+ echo rs6000-ibm-aix3.2
+ fi
+- exit 0 ;;
+- *:AIX:*:[45])
++ exit ;;
++ *:AIX:*:[456])
+ IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+ if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+ IBM_ARCH=rs6000
+@@ -546,28 +565,28 @@
+ IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+ fi
+ echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+- exit 0 ;;
++ exit ;;
+ *:AIX:*:*)
+ echo rs6000-ibm-aix
+- exit 0 ;;
++ exit ;;
+ ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+ echo romp-ibm-bsd4.4
+- exit 0 ;;
++ exit ;;
+ ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and
+ echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to
+- exit 0 ;; # report: romp-ibm BSD 4.3
++ exit ;; # report: romp-ibm BSD 4.3
+ *:BOSX:*:*)
+ echo rs6000-bull-bosx
+- exit 0 ;;
++ exit ;;
+ DPX/2?00:B.O.S.:*:*)
+ echo m68k-bull-sysv3
+- exit 0 ;;
++ exit ;;
+ 9000/[34]??:4.3bsd:1.*:*)
+ echo m68k-hp-bsd
+- exit 0 ;;
++ exit ;;
+ hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+ echo m68k-hp-bsd4.4
+- exit 0 ;;
++ exit ;;
+ 9000/[34678]??:HP-UX:*:*)
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ case "${UNAME_MACHINE}" in
+@@ -624,16 +643,36 @@
+ }
+ EOF
+ (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+- if test -z "$HP_ARCH"; then HP_ARCH=hppa; fi
+- rm -f $dummy.c $dummy && rmdir $tmpdir
++ test -z "$HP_ARCH" && HP_ARCH=hppa
+ fi ;;
+ esac
++ if [ ${HP_ARCH} = "hppa2.0w" ]
++ then
++ eval $set_cc_for_build
++
++ # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
++ # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
++ # generating 64-bit code. GNU and HP use different nomenclature:
++ #
++ # $ CC_FOR_BUILD=cc ./config.guess
++ # => hppa2.0w-hp-hpux11.23
++ # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
++ # => hppa64-hp-hpux11.23
++
++ if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
++ grep -q __LP64__
++ then
++ HP_ARCH="hppa2.0w"
++ else
++ HP_ARCH="hppa64"
++ fi
++ fi
+ echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+- exit 0 ;;
++ exit ;;
+ ia64:HP-UX:*:*)
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ echo ia64-hp-hpux${HPUX_REV}
+- exit 0 ;;
++ exit ;;
+ 3050*:HI-UX:*:*)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+@@ -661,186 +700,248 @@
+ exit (0);
+ }
+ EOF
+- $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && rm -f $dummy.c $dummy && rmdir $tmpdir && exit 0
+- rm -f $dummy.c $dummy && rmdir $tmpdir
++ $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
++ { echo "$SYSTEM_NAME"; exit; }
+ echo unknown-hitachi-hiuxwe2
+- exit 0 ;;
++ exit ;;
+ 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+ echo hppa1.1-hp-bsd
+- exit 0 ;;
++ exit ;;
+ 9000/8??:4.3bsd:*:*)
+ echo hppa1.0-hp-bsd
+- exit 0 ;;
++ exit ;;
+ *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+ echo hppa1.0-hp-mpeix
+- exit 0 ;;
++ exit ;;
+ hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+ echo hppa1.1-hp-osf
+- exit 0 ;;
++ exit ;;
+ hp8??:OSF1:*:*)
+ echo hppa1.0-hp-osf
+- exit 0 ;;
++ exit ;;
+ i*86:OSF1:*:*)
+ if [ -x /usr/sbin/sysversion ] ; then
+ echo ${UNAME_MACHINE}-unknown-osf1mk
+ else
+ echo ${UNAME_MACHINE}-unknown-osf1
+ fi
+- exit 0 ;;
++ exit ;;
+ parisc*:Lites*:*:*)
+ echo hppa1.1-hp-lites
+- exit 0 ;;
++ exit ;;
+ C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+ echo c1-convex-bsd
+- exit 0 ;;
++ exit ;;
+ C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+- exit 0 ;;
++ exit ;;
+ C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+ echo c34-convex-bsd
+- exit 0 ;;
++ exit ;;
+ C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+ echo c38-convex-bsd
+- exit 0 ;;
++ exit ;;
+ C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+ echo c4-convex-bsd
+- exit 0 ;;
++ exit ;;
+ CRAY*Y-MP:*:*:*)
+ echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+- exit 0 ;;
++ exit ;;
+ CRAY*[A-Z]90:*:*:*)
+ echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+ -e 's/\.[^.]*$/.X/'
+- exit 0 ;;
++ exit ;;
+ CRAY*TS:*:*:*)
+ echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+- exit 0 ;;
+- CRAY*T3D:*:*:*)
+- echo alpha-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+- exit 0 ;;
++ exit ;;
+ CRAY*T3E:*:*:*)
+ echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+- exit 0 ;;
++ exit ;;
+ CRAY*SV1:*:*:*)
+ echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+- exit 0 ;;
++ exit ;;
++ *:UNICOS/mp:*:*)
++ echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
++ exit ;;
+ F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+ FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+ echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+- exit 0 ;;
++ exit ;;
++ 5000:UNIX_System_V:4.*:*)
++ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
++ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
++ echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
++ exit ;;
+ i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+ echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ sparc*:BSD/OS:*:*)
+ echo sparc-unknown-bsdi${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *:BSD/OS:*:*)
+ echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *:FreeBSD:*:*)
+- # Determine whether the default compiler uses glibc.
+- eval $set_cc_for_build
+- sed 's/^ //' << EOF >$dummy.c
+- #include <features.h>
+- #if __GLIBC__ >= 2
+- LIBC=gnu
+- #else
+- LIBC=
+- #endif
+-EOF
+- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
+- rm -f $dummy.c && rmdir $tmpdir
+- echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC}
+- exit 0 ;;
++ case ${UNAME_MACHINE} in
++ pc98)
++ echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
++ amd64)
++ echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
++ *)
++ echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
++ esac
++ exit ;;
+ i*:CYGWIN*:*)
+ echo ${UNAME_MACHINE}-pc-cygwin
+- exit 0 ;;
+- i*:MINGW*:*)
++ exit ;;
++ *:MINGW*:*)
+ echo ${UNAME_MACHINE}-pc-mingw32
+- exit 0 ;;
++ exit ;;
++ i*:windows32*:*)
++ # uname -m includes "-pc" on this system.
++ echo ${UNAME_MACHINE}-mingw32
++ exit ;;
+ i*:PW*:*)
+ echo ${UNAME_MACHINE}-pc-pw32
+- exit 0 ;;
+- x86:Interix*:3*)
+- echo i386-pc-interix3
+- exit 0 ;;
++ exit ;;
++ *:Interix*:*)
++ case ${UNAME_MACHINE} in
++ x86)
++ echo i586-pc-interix${UNAME_RELEASE}
++ exit ;;
++ authenticamd | genuineintel | EM64T)
++ echo x86_64-unknown-interix${UNAME_RELEASE}
++ exit ;;
++ IA64)
++ echo ia64-unknown-interix${UNAME_RELEASE}
++ exit ;;
++ esac ;;
++ [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
++ echo i${UNAME_MACHINE}-pc-mks
++ exit ;;
++ 8664:Windows_NT:*)
++ echo x86_64-pc-mks
++ exit ;;
+ i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+ # UNAME_MACHINE based on the output of uname instead of i386?
+- echo i386-pc-interix
+- exit 0 ;;
++ echo i586-pc-interix
++ exit ;;
+ i*:UWIN*:*)
+ echo ${UNAME_MACHINE}-pc-uwin
+- exit 0 ;;
++ exit ;;
++ amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
++ echo x86_64-unknown-cygwin
++ exit ;;
+ p*:CYGWIN*:*)
+ echo powerpcle-unknown-cygwin
+- exit 0 ;;
++ exit ;;
+ prep*:SunOS:5.*:*)
+ echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+- exit 0 ;;
++ exit ;;
+ *:GNU:*:*)
++ # the GNU system
+ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+- exit 0 ;;
++ exit ;;
++ *:GNU/*:*:*)
++ # other systems with GNU libc and userland
++ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
++ exit ;;
+ i*86:Minix:*:*)
+ echo ${UNAME_MACHINE}-pc-minix
+- exit 0 ;;
++ exit ;;
++ alpha:Linux:*:*)
++ case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
++ EV5) UNAME_MACHINE=alphaev5 ;;
++ EV56) UNAME_MACHINE=alphaev56 ;;
++ PCA56) UNAME_MACHINE=alphapca56 ;;
++ PCA57) UNAME_MACHINE=alphapca56 ;;
++ EV6) UNAME_MACHINE=alphaev6 ;;
++ EV67) UNAME_MACHINE=alphaev67 ;;
++ EV68*) UNAME_MACHINE=alphaev68 ;;
++ esac
++ objdump --private-headers /bin/sh | grep -q ld.so.1
++ if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
++ echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
++ exit ;;
+ arm*:Linux:*:*)
++ eval $set_cc_for_build
++ if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
++ | grep -q __ARM_EABI__
++ then
++ echo ${UNAME_MACHINE}-unknown-linux-gnu
++ else
++ echo ${UNAME_MACHINE}-unknown-linux-gnueabi
++ fi
++ exit ;;
++ avr32*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+- exit 0 ;;
++ exit ;;
++ cris:Linux:*:*)
++ echo cris-axis-linux-gnu
++ exit ;;
++ crisv32:Linux:*:*)
++ echo crisv32-axis-linux-gnu
++ exit ;;
++ frv:Linux:*:*)
++ echo frv-unknown-linux-gnu
++ exit ;;
++ i*86:Linux:*:*)
++ LIBC=gnu
++ eval $set_cc_for_build
++ sed 's/^ //' << EOF >$dummy.c
++ #ifdef __dietlibc__
++ LIBC=dietlibc
++ #endif
++EOF
++ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
++ echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
++ exit ;;
+ ia64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+- exit 0 ;;
++ exit ;;
++ m32r*:Linux:*:*)
++ echo ${UNAME_MACHINE}-unknown-linux-gnu
++ exit ;;
+ m68*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+- exit 0 ;;
+- mips:Linux:*:*)
++ exit ;;
++ mips:Linux:*:* | mips64:Linux:*:*)
+ eval $set_cc_for_build
+ sed 's/^ //' << EOF >$dummy.c
+ #undef CPU
+- #undef mips
+- #undef mipsel
++ #undef ${UNAME_MACHINE}
++ #undef ${UNAME_MACHINE}el
+ #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+- CPU=mipsel
++ CPU=${UNAME_MACHINE}el
+ #else
+ #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+- CPU=mips
++ CPU=${UNAME_MACHINE}
+ #else
+ CPU=
+ #endif
+ #endif
+ EOF
+- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
+- rm -f $dummy.c && rmdir $tmpdir
+- test x"${CPU}" != x && echo "${CPU}-pc-linux-gnu" && exit 0
++ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
++ test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+ ;;
+- ppc:Linux:*:*)
+- echo powerpc-unknown-linux-gnu
+- exit 0 ;;
+- ppc64:Linux:*:*)
+- echo powerpc64-unknown-linux-gnu
+- exit 0 ;;
+- alpha:Linux:*:*)
+- case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+- EV5) UNAME_MACHINE=alphaev5 ;;
+- EV56) UNAME_MACHINE=alphaev56 ;;
+- PCA56) UNAME_MACHINE=alphapca56 ;;
+- PCA57) UNAME_MACHINE=alphapca56 ;;
+- EV6) UNAME_MACHINE=alphaev6 ;;
+- EV67) UNAME_MACHINE=alphaev67 ;;
+- EV68*) UNAME_MACHINE=alphaev68 ;;
+- esac
+- objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
+- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+- exit 0 ;;
++ or32:Linux:*:*)
++ echo or32-unknown-linux-gnu
++ exit ;;
++ padre:Linux:*:*)
++ echo sparc-unknown-linux-gnu
++ exit ;;
++ parisc64:Linux:*:* | hppa64:Linux:*:*)
++ echo hppa64-unknown-linux-gnu
++ exit ;;
+ parisc:Linux:*:* | hppa:Linux:*:*)
+ # Look for CPU level
+ case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+@@ -848,82 +949,40 @@
+ PA8*) echo hppa2.0-unknown-linux-gnu ;;
+ *) echo hppa-unknown-linux-gnu ;;
+ esac
+- exit 0 ;;
+- parisc64:Linux:*:* | hppa64:Linux:*:*)
+- echo hppa64-unknown-linux-gnu
+- exit 0 ;;
++ exit ;;
++ ppc64:Linux:*:*)
++ echo powerpc64-unknown-linux-gnu
++ exit ;;
++ ppc:Linux:*:*)
++ echo powerpc-unknown-linux-gnu
++ exit ;;
+ s390:Linux:*:* | s390x:Linux:*:*)
+ echo ${UNAME_MACHINE}-ibm-linux
+- exit 0 ;;
++ exit ;;
++ sh64*:Linux:*:*)
++ echo ${UNAME_MACHINE}-unknown-linux-gnu
++ exit ;;
+ sh*:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+- exit 0 ;;
++ exit ;;
+ sparc:Linux:*:* | sparc64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+- exit 0 ;;
++ exit ;;
++ vax:Linux:*:*)
++ echo ${UNAME_MACHINE}-dec-linux-gnu
++ exit ;;
+ x86_64:Linux:*:*)
+ echo x86_64-unknown-linux-gnu
+- exit 0 ;;
+- i*86:Linux:*:*)
+- # The BFD linker knows what the default object file format is, so
+- # first see if it will tell us. cd to the root directory to prevent
+- # problems with other programs or directories called `ld' in the path.
+- # Set LC_ALL=C to ensure ld outputs messages in English.
+- ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
+- | sed -ne '/supported targets:/!d
+- s/[ ][ ]*/ /g
+- s/.*supported targets: *//
+- s/ .*//
+- p'`
+- case "$ld_supported_targets" in
+- elf32-i386)
+- TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
+- ;;
+- a.out-i386-linux)
+- echo "${UNAME_MACHINE}-pc-linux-gnuaout"
+- exit 0 ;;
+- coff-i386)
+- echo "${UNAME_MACHINE}-pc-linux-gnucoff"
+- exit 0 ;;
+- "")
+- # Either a pre-BFD a.out linker (linux-gnuoldld) or
+- # one that does not give us useful --help.
+- echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
+- exit 0 ;;
+- esac
+- # Determine whether the default compiler is a.out or elf
+- eval $set_cc_for_build
+- sed 's/^ //' << EOF >$dummy.c
+- #include <features.h>
+- #ifdef __ELF__
+- # ifdef __GLIBC__
+- # if __GLIBC__ >= 2
+- LIBC=gnu
+- # else
+- LIBC=gnulibc1
+- # endif
+- # else
+- LIBC=gnulibc1
+- # endif
+- #else
+- #ifdef __INTEL_COMPILER
+- LIBC=gnu
+- #else
+- LIBC=gnuaout
+- #endif
+- #endif
+-EOF
+- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
+- rm -f $dummy.c && rmdir $tmpdir
+- test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0
+- test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0
+- ;;
++ exit ;;
++ xtensa*:Linux:*:*)
++ echo ${UNAME_MACHINE}-unknown-linux-gnu
++ exit ;;
+ i*86:DYNIX/ptx:4*:*)
+ # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+ # earlier versions are messed up and put the nodename in both
+ # sysname and nodename.
+ echo i386-sequent-sysv4
+- exit 0 ;;
++ exit ;;
+ i*86:UNIX_SV:4.2MP:2.*)
+ # Unixware is an offshoot of SVR4, but it has its own version
+ # number series starting with 2...
+@@ -931,7 +990,27 @@
+ # I just have to hope. -- rms.
+ # Use sysv4.2uw... so that sysv4* matches it.
+ echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+- exit 0 ;;
++ exit ;;
++ i*86:OS/2:*:*)
++ # If we were able to find `uname', then EMX Unix compatibility
++ # is probably installed.
++ echo ${UNAME_MACHINE}-pc-os2-emx
++ exit ;;
++ i*86:XTS-300:*:STOP)
++ echo ${UNAME_MACHINE}-unknown-stop
++ exit ;;
++ i*86:atheos:*:*)
++ echo ${UNAME_MACHINE}-unknown-atheos
++ exit ;;
++ i*86:syllable:*:*)
++ echo ${UNAME_MACHINE}-pc-syllable
++ exit ;;
++ i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
++ echo i386-unknown-lynxos${UNAME_RELEASE}
++ exit ;;
++ i*86:*DOS:*:*)
++ echo ${UNAME_MACHINE}-pc-msdosdjgpp
++ exit ;;
+ i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+ UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+ if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+@@ -939,15 +1018,16 @@
+ else
+ echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+ fi
+- exit 0 ;;
+- i*86:*:5:[78]*)
++ exit ;;
++ i*86:*:5:[678]*)
++ # UnixWare 7.x, OpenUNIX and OpenServer 6.
+ case `/bin/uname -X | grep "^Machine"` in
+ *486*) UNAME_MACHINE=i486 ;;
+ *Pentium) UNAME_MACHINE=i586 ;;
+ *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+ esac
+ echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+- exit 0 ;;
++ exit ;;
+ i*86:*:3.2:*)
+ if test -f /usr/options/cb.name; then
+ UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+@@ -965,76 +1045,86 @@
+ else
+ echo ${UNAME_MACHINE}-pc-sysv32
+ fi
+- exit 0 ;;
+- i*86:*DOS:*:*)
+- echo ${UNAME_MACHINE}-pc-msdosdjgpp
+- exit 0 ;;
++ exit ;;
+ pc:*:*:*)
+ # Left here for compatibility:
+ # uname -m prints for DJGPP always 'pc', but it prints nothing about
+- # the processor, so we play safe by assuming i386.
+- echo i386-pc-msdosdjgpp
+- exit 0 ;;
++ # the processor, so we play safe by assuming i586.
++ # Note: whatever this is, it MUST be the same as what config.sub
++ # prints for the "djgpp" host, or else GDB configury will decide that
++ # this is a cross-build.
++ echo i586-pc-msdosdjgpp
++ exit ;;
+ Intel:Mach:3*:*)
+ echo i386-pc-mach3
+- exit 0 ;;
++ exit ;;
+ paragon:*:*:*)
+ echo i860-intel-osf1
+- exit 0 ;;
++ exit ;;
+ i860:*:4.*:*) # i860-SVR4
+ if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+ echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+ else # Add other i860-SVR4 vendors below as they are discovered.
+ echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4
+ fi
+- exit 0 ;;
++ exit ;;
+ mini*:CTIX:SYS*5:*)
+ # "miniframe"
+ echo m68010-convergent-sysv
+- exit 0 ;;
++ exit ;;
+ mc68k:UNIX:SYSTEM5:3.51m)
+ echo m68k-convergent-sysv
+- exit 0 ;;
+- M68*:*:R3V[567]*:*)
+- test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
+- 3[34]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0)
++ exit ;;
++ M680?0:D-NIX:5.3:*)
++ echo m68k-diab-dnix
++ exit ;;
++ M68*:*:R3V[5678]*:*)
++ test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
++ 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
+ OS_REL=''
+ test -r /etc/.relid \
+ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+- && echo i486-ncr-sysv4.3${OS_REL} && exit 0
++ && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+ /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+- && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
++ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+ 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+- && echo i486-ncr-sysv4 && exit 0 ;;
++ && { echo i486-ncr-sysv4; exit; } ;;
++ NCR*:*:4.2:* | MPRAS*:*:4.2:*)
++ OS_REL='.3'
++ test -r /etc/.relid \
++ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
++ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
++ && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
++ /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
++ && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
++ /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
++ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+ m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+ echo m68k-unknown-lynxos${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ mc68030:UNIX_System_V:4.*:*)
+ echo m68k-atari-sysv4
+- exit 0 ;;
+- i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+- echo i386-unknown-lynxos${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ TSUNAMI:LynxOS:2.*:*)
+ echo sparc-unknown-lynxos${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ rs6000:LynxOS:2.*:*)
+ echo rs6000-unknown-lynxos${UNAME_RELEASE}
+- exit 0 ;;
+- PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
++ exit ;;
++ PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
+ echo powerpc-unknown-lynxos${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ SM[BE]S:UNIX_SV:*:*)
+ echo mips-dde-sysv${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ RM*:ReliantUNIX-*:*:*)
+ echo mips-sni-sysv4
+- exit 0 ;;
++ exit ;;
+ RM*:SINIX-*:*:*)
+ echo mips-sni-sysv4
+- exit 0 ;;
++ exit ;;
+ *:SINIX-*:*:*)
+ if uname -p 2>/dev/null >/dev/null ; then
+ UNAME_MACHINE=`(uname -p) 2>/dev/null`
+@@ -1042,64 +1132,94 @@
+ else
+ echo ns32k-sni-sysv
+ fi
+- exit 0 ;;
++ exit ;;
+ PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+ # says <Richard.M.Bartel@ccMail.Census.GOV>
+ echo i586-unisys-sysv4
+- exit 0 ;;
++ exit ;;
+ *:UNIX_System_V:4*:FTX*)
+ # From Gerald Hewes <hewes@openmarket.com>.
+ # How about differentiating between stratus architectures? -djm
+ echo hppa1.1-stratus-sysv4
+- exit 0 ;;
++ exit ;;
+ *:*:*:FTX*)
+ # From seanf@swdc.stratus.com.
+ echo i860-stratus-sysv4
+- exit 0 ;;
++ exit ;;
++ i*86:VOS:*:*)
++ # From Paul.Green@stratus.com.
++ echo ${UNAME_MACHINE}-stratus-vos
++ exit ;;
+ *:VOS:*:*)
+ # From Paul.Green@stratus.com.
+ echo hppa1.1-stratus-vos
+- exit 0 ;;
++ exit ;;
+ mc68*:A/UX:*:*)
+ echo m68k-apple-aux${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ news*:NEWS-OS:6*:*)
+ echo mips-sony-newsos6
+- exit 0 ;;
++ exit ;;
+ R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+ if [ -d /usr/nec ]; then
+ echo mips-nec-sysv${UNAME_RELEASE}
+ else
+ echo mips-unknown-sysv${UNAME_RELEASE}
+ fi
+- exit 0 ;;
++ exit ;;
+ BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
+ echo powerpc-be-beos
+- exit 0 ;;
++ exit ;;
+ BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only.
+ echo powerpc-apple-beos
+- exit 0 ;;
++ exit ;;
+ BePC:BeOS:*:*) # BeOS running on Intel PC compatible.
+ echo i586-pc-beos
+- exit 0 ;;
++ exit ;;
++ BePC:Haiku:*:*) # Haiku running on Intel PC compatible.
++ echo i586-pc-haiku
++ exit ;;
+ SX-4:SUPER-UX:*:*)
+ echo sx4-nec-superux${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ SX-5:SUPER-UX:*:*)
+ echo sx5-nec-superux${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ SX-6:SUPER-UX:*:*)
+ echo sx6-nec-superux${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
++ SX-7:SUPER-UX:*:*)
++ echo sx7-nec-superux${UNAME_RELEASE}
++ exit ;;
++ SX-8:SUPER-UX:*:*)
++ echo sx8-nec-superux${UNAME_RELEASE}
++ exit ;;
++ SX-8R:SUPER-UX:*:*)
++ echo sx8r-nec-superux${UNAME_RELEASE}
++ exit ;;
+ Power*:Rhapsody:*:*)
+ echo powerpc-apple-rhapsody${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *:Rhapsody:*:*)
+ echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *:Darwin:*:*)
+- echo `uname -p`-apple-darwin${UNAME_RELEASE}
+- exit 0 ;;
++ UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
++ case $UNAME_PROCESSOR in
++ i386)
++ eval $set_cc_for_build
++ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
++ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
++ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
++ grep IS_64BIT_ARCH >/dev/null
++ then
++ UNAME_PROCESSOR="x86_64"
++ fi
++ fi ;;
++ unknown) UNAME_PROCESSOR=powerpc ;;
++ esac
++ echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
++ exit ;;
+ *:procnto*:*:* | *:QNX:[0123456789]*:*)
+ UNAME_PROCESSOR=`uname -p`
+ if test "$UNAME_PROCESSOR" = "x86"; then
+@@ -1107,22 +1227,25 @@
+ UNAME_MACHINE=pc
+ fi
+ echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *:QNX:*:4*)
+ echo i386-pc-qnx
+- exit 0 ;;
+- NSR-[DGKLNPTVW]:NONSTOP_KERNEL:*:*)
++ exit ;;
++ NSE-?:NONSTOP_KERNEL:*:*)
++ echo nse-tandem-nsk${UNAME_RELEASE}
++ exit ;;
++ NSR-?:NONSTOP_KERNEL:*:*)
+ echo nsr-tandem-nsk${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *:NonStop-UX:*:*)
+ echo mips-compaq-nonstopux
+- exit 0 ;;
++ exit ;;
+ BS2000:POSIX*:*:*)
+ echo bs2000-siemens-sysv
+- exit 0 ;;
++ exit ;;
+ DS/*:UNIX_System_V:*:*)
+ echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+- exit 0 ;;
++ exit ;;
+ *:Plan9:*:*)
+ # "uname -m" is not consistent, so use $cputype instead. 386
+ # is converted to i386 for consistency with other x86
+@@ -1133,36 +1256,50 @@
+ UNAME_MACHINE="$cputype"
+ fi
+ echo ${UNAME_MACHINE}-unknown-plan9
+- exit 0 ;;
+- i*86:OS/2:*:*)
+- # If we were able to find `uname', then EMX Unix compatibility
+- # is probably installed.
+- echo ${UNAME_MACHINE}-pc-os2-emx
+- exit 0 ;;
++ exit ;;
+ *:TOPS-10:*:*)
+ echo pdp10-unknown-tops10
+- exit 0 ;;
++ exit ;;
+ *:TENEX:*:*)
+ echo pdp10-unknown-tenex
+- exit 0 ;;
++ exit ;;
+ KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+ echo pdp10-dec-tops20
+- exit 0 ;;
++ exit ;;
+ XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+ echo pdp10-xkl-tops20
+- exit 0 ;;
++ exit ;;
+ *:TOPS-20:*:*)
+ echo pdp10-unknown-tops20
+- exit 0 ;;
++ exit ;;
+ *:ITS:*:*)
+ echo pdp10-unknown-its
+- exit 0 ;;
+- i*86:XTS-300:*:STOP)
+- echo ${UNAME_MACHINE}-unknown-stop
+- exit 0 ;;
+- i*86:atheos:*:*)
+- echo ${UNAME_MACHINE}-unknown-atheos
+- exit 0 ;;
++ exit ;;
++ SEI:*:*:SEIUX)
++ echo mips-sei-seiux${UNAME_RELEASE}
++ exit ;;
++ *:DragonFly:*:*)
++ echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
++ exit ;;
++ *:*VMS:*:*)
++ UNAME_MACHINE=`(uname -p) 2>/dev/null`
++ case "${UNAME_MACHINE}" in
++ A*) echo alpha-dec-vms ; exit ;;
++ I*) echo ia64-dec-vms ; exit ;;
++ V*) echo vax-dec-vms ; exit ;;
++ esac ;;
++ *:XENIX:*:SysV)
++ echo i386-pc-xenix
++ exit ;;
++ i*86:skyos:*:*)
++ echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
++ exit ;;
++ i*86:rdos:*:*)
++ echo ${UNAME_MACHINE}-pc-rdos
++ exit ;;
++ i*86:AROS:*:*)
++ echo ${UNAME_MACHINE}-pc-aros
++ exit ;;
+ esac
+
+ #echo '(No uname command or uname output not recognized.)' 1>&2
+@@ -1194,7 +1331,7 @@
+ #endif
+
+ #if defined (__arm) && defined (__acorn) && defined (__unix)
+- printf ("arm-acorn-riscix"); exit (0);
++ printf ("arm-acorn-riscix\n"); exit (0);
+ #endif
+
+ #if defined (hp300) && !defined (hpux)
+@@ -1283,12 +1420,12 @@
+ }
+ EOF
+
+-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && rm -f $dummy.c $dummy && rmdir $tmpdir && exit 0
+-rm -f $dummy.c $dummy && rmdir $tmpdir
++$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
++ { echo "$SYSTEM_NAME"; exit; }
+
+ # Apollos put the system type in the environment.
+
+-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
++test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
+
+ # Convex versions that predate uname can use getsysinfo(1)
+
+@@ -1297,22 +1434,22 @@
+ case `getsysinfo -f cpu_type` in
+ c1*)
+ echo c1-convex-bsd
+- exit 0 ;;
++ exit ;;
+ c2*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+- exit 0 ;;
++ exit ;;
+ c34*)
+ echo c34-convex-bsd
+- exit 0 ;;
++ exit ;;
+ c38*)
+ echo c38-convex-bsd
+- exit 0 ;;
++ exit ;;
+ c4*)
+ echo c4-convex-bsd
+- exit 0 ;;
++ exit ;;
+ esac
+ fi
+
+@@ -1323,7 +1460,9 @@
+ the operating system you are using. It is advised that you
+ download the most up to date version of the config scripts from
+
+- ftp://ftp.gnu.org/pub/gnu/config/
++ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
++and
++ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
+ If the version you run ($0) is already up to date, please
+ send the following data and any information you think might be
+--- misc/libtextcat-2.2/config.sub 2010-04-15 09:20:04.000000000 +0000
++++ misc/build/libtextcat-2.2/config.sub 2010-04-15 09:20:41.000000000 +0000
+@@ -1,9 +1,10 @@
+ #! /bin/sh
+ # Configuration validation subroutine script.
+ # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+-# 2000, 2001, 2002 Free Software Foundation, Inc.
++# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
++# Free Software Foundation, Inc.
+
+-timestamp='2002-09-05'
++timestamp='2010-01-22'
+
+ # This file is (in principle) common to ALL GNU software.
+ # The presence of a machine in this file suggests that SOME GNU software
+@@ -21,22 +22,26 @@
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with this program; if not, write to the Free Software
+-# Foundation, Inc., 59 Temple Place - Suite 330,
+-# Boston, MA 02111-1307, USA.
+-
++# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
++# 02110-1301, USA.
++#
+ # As a special exception to the GNU General Public License, if you
+ # distribute this file as part of a program that contains a
+ # configuration script generated by Autoconf, you may include it under
+ # the same distribution terms that you use for the rest of that program.
+
++
+ # Please send patches to <config-patches@gnu.org>. Submit a context
+-# diff and a properly formatted ChangeLog entry.
++# diff and a properly formatted GNU ChangeLog entry.
+ #
+ # Configuration subroutine to validate and canonicalize a configuration type.
+ # Supply the specified configuration type as an argument.
+ # If it is invalid, we print an error message on stderr and exit with code 1.
+ # Otherwise, we print the canonical config type on stdout and succeed.
+
++# You can get the latest version of this script from:
++# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
++
+ # This file is supposed to be the same for all GNU packages
+ # and recognize all the CPU types, system types and aliases
+ # that are meaningful with *any* GNU software.
+@@ -70,8 +75,9 @@
+ version="\
+ GNU config.sub ($timestamp)
+
+-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+-Free Software Foundation, Inc.
++Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
++2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
++Software Foundation, Inc.
+
+ This is free software; see the source for copying conditions. There is NO
+ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+@@ -83,11 +89,11 @@
+ while test $# -gt 0 ; do
+ case $1 in
+ --time-stamp | --time* | -t )
+- echo "$timestamp" ; exit 0 ;;
++ echo "$timestamp" ; exit ;;
+ --version | -v )
+- echo "$version" ; exit 0 ;;
++ echo "$version" ; exit ;;
+ --help | --h* | -h )
+- echo "$usage"; exit 0 ;;
++ echo "$usage"; exit ;;
+ -- ) # Stop option processing
+ shift; break ;;
+ - ) # Use stdin as input.
+@@ -99,7 +105,7 @@
+ *local*)
+ # First pass through any local machine types.
+ echo $1
+- exit 0;;
++ exit ;;
+
+ * )
+ break ;;
+@@ -118,7 +124,10 @@
+ # Here we must recognize all the valid KERNEL-OS combinations.
+ maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+ case $maybe_os in
+- nto-qnx* | linux-gnu* | freebsd*-gnu* | storm-chaos* | os2-emx* | windows32-* | rtmk-nova*)
++ nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
++ uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
++ kopensolaris*-gnu* | \
++ storm-chaos* | os2-emx* | rtmk-nova*)
+ os=-$maybe_os
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+ ;;
+@@ -144,10 +153,13 @@
+ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+- -apple | -axis)
++ -apple | -axis | -knuth | -cray | -microblaze)
+ os=
+ basic_machine=$1
+ ;;
++ -bluegene*)
++ os=-cnk
++ ;;
+ -sim | -cisco | -oki | -wec | -winbond)
+ os=
+ basic_machine=$1
+@@ -169,6 +181,10 @@
+ -hiux*)
+ os=-hiuxwe2
+ ;;
++ -sco6)
++ os=-sco5v6
++ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
++ ;;
+ -sco5)
+ os=-sco3.2v5
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+@@ -185,6 +201,10 @@
+ # Don't forget version if it is 3.2v4 or newer.
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
++ -sco5v6*)
++ # Don't forget version if it is 3.2v4 or newer.
++ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
++ ;;
+ -sco*)
+ os=-sco3.2v2
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+@@ -228,51 +248,71 @@
+ | a29k \
+ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+- | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \
+- | clipper \
++ | am33_2.0 \
++ | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
++ | bfin \
++ | c4x | clipper \
+ | d10v | d30v | dlx | dsp16xx \
+- | fr30 | frv \
++ | fido | fr30 | frv \
+ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+ | i370 | i860 | i960 | ia64 \
+- | ip2k \
+- | m32r | m68000 | m68k | m88k | mcore \
++ | ip2k | iq2000 \
++ | lm32 \
++ | m32c | m32r | m32rle | m68000 | m68k | m88k \
++ | maxq | mb | microblaze | mcore | mep | metag \
+ | mips | mipsbe | mipseb | mipsel | mipsle \
+ | mips16 \
+ | mips64 | mips64el \
+- | mips64vr | mips64vrel \
++ | mips64octeon | mips64octeonel \
+ | mips64orion | mips64orionel \
++ | mips64r5900 | mips64r5900el \
++ | mips64vr | mips64vrel \
+ | mips64vr4100 | mips64vr4100el \
+ | mips64vr4300 | mips64vr4300el \
+ | mips64vr5000 | mips64vr5000el \
++ | mips64vr5900 | mips64vr5900el \
+ | mipsisa32 | mipsisa32el \
++ | mipsisa32r2 | mipsisa32r2el \
+ | mipsisa64 | mipsisa64el \
++ | mipsisa64r2 | mipsisa64r2el \
+ | mipsisa64sb1 | mipsisa64sb1el \
+ | mipsisa64sr71k | mipsisa64sr71kel \
+ | mipstx39 | mipstx39el \
+ | mn10200 | mn10300 \
++ | moxie \
++ | mt \
++ | msp430 \
++ | nios | nios2 \
+ | ns16k | ns32k \
+- | openrisc | or32 \
++ | or32 \
+ | pdp10 | pdp11 | pj | pjl \
+ | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+ | pyramid \
+- | sh | sh[1234] | sh3e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \
++ | rx \
++ | score \
++ | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+ | sh64 | sh64le \
+- | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \
+- | strongarm \
+- | tahoe | thumb | tic80 | tron \
++ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
++ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
++ | spu | strongarm \
++ | tahoe | thumb | tic4x | tic80 | tron \
++ | ubicom32 \
+ | v850 | v850e \
+ | we32k \
+- | x86 | xscale | xstormy16 | xtensa \
+- | z8k)
++ | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
++ | z8k | z80)
+ basic_machine=$basic_machine-unknown
+ ;;
+- m6811 | m68hc11 | m6812 | m68hc12)
++ m6811 | m68hc11 | m6812 | m68hc12 | picochip)
+ # Motorola 68HC11/12.
+ basic_machine=$basic_machine-unknown
+ os=-none
+ ;;
+ m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+ ;;
++ ms1)
++ basic_machine=mt-unknown
++ ;;
+
+ # We use `pc' rather than `unknown'
+ # because (1) that's what they normally are, and
+@@ -292,50 +332,69 @@
+ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
+ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
+- | avr-* \
+- | bs2000-* \
+- | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* \
+- | clipper-* | cydra-* \
++ | avr-* | avr32-* \
++ | bfin-* | bs2000-* \
++ | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
++ | clipper-* | craynv-* | cydra-* \
+ | d10v-* | d30v-* | dlx-* \
+ | elxsi-* \
+- | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \
++ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
+ | h8300-* | h8500-* \
+ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+ | i*86-* | i860-* | i960-* | ia64-* \
+- | ip2k-* \
+- | m32r-* \
++ | ip2k-* | iq2000-* \
++ | lm32-* \
++ | m32c-* | m32r-* | m32rle-* \
+ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+- | m88110-* | m88k-* | mcore-* \
++ | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
+ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+ | mips16-* \
+ | mips64-* | mips64el-* \
+- | mips64vr-* | mips64vrel-* \
++ | mips64octeon-* | mips64octeonel-* \
+ | mips64orion-* | mips64orionel-* \
++ | mips64r5900-* | mips64r5900el-* \
++ | mips64vr-* | mips64vrel-* \
+ | mips64vr4100-* | mips64vr4100el-* \
+ | mips64vr4300-* | mips64vr4300el-* \
+ | mips64vr5000-* | mips64vr5000el-* \
++ | mips64vr5900-* | mips64vr5900el-* \
+ | mipsisa32-* | mipsisa32el-* \
++ | mipsisa32r2-* | mipsisa32r2el-* \
+ | mipsisa64-* | mipsisa64el-* \
++ | mipsisa64r2-* | mipsisa64r2el-* \
+ | mipsisa64sb1-* | mipsisa64sb1el-* \
+ | mipsisa64sr71k-* | mipsisa64sr71kel-* \
+- | mipstx39 | mipstx39el \
++ | mipstx39-* | mipstx39el-* \
++ | mmix-* \
++ | mt-* \
++ | msp430-* \
++ | nios-* | nios2-* \
+ | none-* | np1-* | ns16k-* | ns32k-* \
+ | orion-* \
+ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+ | pyramid-* \
+- | romp-* | rs6000-* \
+- | sh-* | sh[1234]-* | sh3e-* | sh[34]eb-* | shbe-* \
++ | romp-* | rs6000-* | rx-* \
++ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
+ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+- | sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \
+- | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
+- | tahoe-* | thumb-* | tic30-* | tic4x-* | tic54x-* | tic80-* | tron-* \
++ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
++ | sparclite-* \
++ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
++ | tahoe-* | thumb-* \
++ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
++ | tile-* | tilegx-* \
++ | tron-* \
++ | ubicom32-* \
+ | v850-* | v850e-* | vax-* \
+ | we32k-* \
+- | x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \
+- | xtensa-* \
++ | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
++ | xstormy16-* | xtensa*-* \
+ | ymp-* \
+- | z8k-*)
++ | z8k-* | z80-*)
++ ;;
++ # Recognize the basic CPU types without company name, with glob match.
++ xtensa*)
++ basic_machine=$basic_machine-unknown
+ ;;
+ # Recognize the various machine names and aliases which stand
+ # for a CPU type and a company and sometimes even an OS.
+@@ -353,6 +412,9 @@
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
++ abacus)
++ basic_machine=abacus-unknown
++ ;;
+ adobe68k)
+ basic_machine=m68010-adobe
+ os=-scout
+@@ -367,6 +429,12 @@
+ basic_machine=a29k-none
+ os=-bsd
+ ;;
++ amd64)
++ basic_machine=x86_64-pc
++ ;;
++ amd64-*)
++ basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
++ ;;
+ amdahl)
+ basic_machine=580-amdahl
+ os=-sysv
+@@ -390,6 +458,10 @@
+ basic_machine=m68k-apollo
+ os=-bsd
+ ;;
++ aros)
++ basic_machine=i386-pc
++ os=-aros
++ ;;
+ aux)
+ basic_machine=m68k-apple
+ os=-aux
+@@ -398,10 +470,26 @@
+ basic_machine=ns32k-sequent
+ os=-dynix
+ ;;
++ blackfin)
++ basic_machine=bfin-unknown
++ os=-linux
++ ;;
++ blackfin-*)
++ basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
++ os=-linux
++ ;;
++ bluegene*)
++ basic_machine=powerpc-ibm
++ os=-cnk
++ ;;
+ c90)
+ basic_machine=c90-cray
+ os=-unicos
+ ;;
++ cegcc)
++ basic_machine=arm-unknown
++ os=-cegcc
++ ;;
+ convex-c1)
+ basic_machine=c1-convex
+ os=-bsd
+@@ -426,12 +514,27 @@
+ basic_machine=j90-cray
+ os=-unicos
+ ;;
++ craynv)
++ basic_machine=craynv-cray
++ os=-unicosmp
++ ;;
++ cr16)
++ basic_machine=cr16-unknown
++ os=-elf
++ ;;
+ crds | unos)
+ basic_machine=m68k-crds
+ ;;
++ crisv32 | crisv32-* | etraxfs*)
++ basic_machine=crisv32-axis
++ ;;
+ cris | cris-* | etrax*)
+ basic_machine=cris-axis
+ ;;
++ crx)
++ basic_machine=crx-unknown
++ os=-elf
++ ;;
+ da30 | da30-*)
+ basic_machine=m68k-da30
+ ;;
+@@ -454,6 +557,14 @@
+ basic_machine=m88k-motorola
+ os=-sysv3
+ ;;
++ dicos)
++ basic_machine=i686-pc
++ os=-dicos
++ ;;
++ djgpp)
++ basic_machine=i586-pc
++ os=-msdosdjgpp
++ ;;
+ dpx20 | dpx20-*)
+ basic_machine=rs6000-bull
+ os=-bosx
+@@ -604,6 +715,14 @@
+ basic_machine=m68k-isi
+ os=-sysv
+ ;;
++ m68knommu)
++ basic_machine=m68k-unknown
++ os=-linux
++ ;;
++ m68knommu-*)
++ basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
++ os=-linux
++ ;;
+ m88k-omron*)
+ basic_machine=m88k-omron
+ ;;
+@@ -615,10 +734,17 @@
+ basic_machine=ns32k-utek
+ os=-sysv
+ ;;
++ microblaze)
++ basic_machine=microblaze-xilinx
++ ;;
+ mingw32)
+ basic_machine=i386-pc
+ os=-mingw32
+ ;;
++ mingw32ce)
++ basic_machine=arm-unknown
++ os=-mingw32ce
++ ;;
+ miniframe)
+ basic_machine=m68000-convergent
+ ;;
+@@ -632,10 +758,6 @@
+ mips3*)
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+ ;;
+- mmix*)
+- basic_machine=mmix-knuth
+- os=-mmixware
+- ;;
+ monitor)
+ basic_machine=m68k-rom68k
+ os=-coff
+@@ -648,6 +770,9 @@
+ basic_machine=i386-pc
+ os=-msdos
+ ;;
++ ms1-*)
++ basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
++ ;;
+ mvs)
+ basic_machine=i370-ibm
+ os=-mvs
+@@ -723,9 +848,12 @@
+ basic_machine=hppa1.1-oki
+ os=-proelf
+ ;;
+- or32 | or32-*)
++ openrisc | openrisc-*)
+ basic_machine=or32-unknown
+- os=-coff
++ ;;
++ os400)
++ basic_machine=powerpc-ibm
++ os=-os400
+ ;;
+ OSE68000 | ose68000)
+ basic_machine=m68000-ericsson
+@@ -743,6 +871,14 @@
+ basic_machine=i860-intel
+ os=-osf
+ ;;
++ parisc)
++ basic_machine=hppa-unknown
++ os=-linux
++ ;;
++ parisc-*)
++ basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
++ os=-linux
++ ;;
+ pbd)
+ basic_machine=sparc-tti
+ ;;
+@@ -752,24 +888,36 @@
+ pc532 | pc532-*)
+ basic_machine=ns32k-pc532
+ ;;
++ pc98)
++ basic_machine=i386-pc
++ ;;
++ pc98-*)
++ basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
++ ;;
+ pentium | p5 | k5 | k6 | nexgen | viac3)
+ basic_machine=i586-pc
+ ;;
+ pentiumpro | p6 | 6x86 | athlon | athlon_*)
+ basic_machine=i686-pc
+ ;;
+- pentiumii | pentium2)
++ pentiumii | pentium2 | pentiumiii | pentium3)
+ basic_machine=i686-pc
+ ;;
++ pentium4)
++ basic_machine=i786-pc
++ ;;
+ pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+ basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentiumpro-* | p6-* | 6x86-* | athlon-*)
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+- pentiumii-* | pentium2-*)
++ pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
++ pentium4-*)
++ basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
++ ;;
+ pn)
+ basic_machine=pn-gould
+ ;;
+@@ -802,6 +950,10 @@
+ basic_machine=i586-unknown
+ os=-pw32
+ ;;
++ rdos)
++ basic_machine=i386-pc
++ os=-rdos
++ ;;
+ rom68k)
+ basic_machine=m68k-rom68k
+ os=-coff
+@@ -828,6 +980,14 @@
+ sb1el)
+ basic_machine=mipsisa64sb1el-unknown
+ ;;
++ sde)
++ basic_machine=mipsisa32-sde
++ os=-elf
++ ;;
++ sei)
++ basic_machine=mips-sei
++ os=-seiux
++ ;;
+ sequent)
+ basic_machine=i386-sequent
+ ;;
+@@ -835,6 +995,12 @@
+ basic_machine=sh-hitachi
+ os=-hms
+ ;;
++ sh5el)
++ basic_machine=sh5le-unknown
++ ;;
++ sh64)
++ basic_machine=sh64-unknown
++ ;;
+ sparclite-wrs | simso-wrs)
+ basic_machine=sparclite-wrs
+ os=-vxworks
+@@ -901,10 +1067,6 @@
+ basic_machine=i386-sequent
+ os=-dynix
+ ;;
+- t3d)
+- basic_machine=alpha-cray
+- os=-unicos
+- ;;
+ t3e)
+ basic_machine=alphaev5-cray
+ os=-unicos
+@@ -913,14 +1075,27 @@
+ basic_machine=t90-cray
+ os=-unicos
+ ;;
+- tic4x | c4x*)
+- basic_machine=tic4x-unknown
+- os=-coff
+- ;;
+ tic54x | c54x*)
+ basic_machine=tic54x-unknown
+ os=-coff
+ ;;
++ tic55x | c55x*)
++ basic_machine=tic55x-unknown
++ os=-coff
++ ;;
++ tic6x | c6x*)
++ basic_machine=tic6x-unknown
++ os=-coff
++ ;;
++ # This must be matched before tile*.
++ tilegx*)
++ basic_machine=tilegx-unknown
++ os=-linux-gnu
++ ;;
++ tile*)
++ basic_machine=tile-unknown
++ os=-linux-gnu
++ ;;
+ tx39)
+ basic_machine=mipstx39-unknown
+ ;;
+@@ -934,6 +1109,10 @@
+ tower | tower-32)
+ basic_machine=m68k-ncr
+ ;;
++ tpf)
++ basic_machine=s390x-ibm
++ os=-tpf
++ ;;
+ udi29k)
+ basic_machine=a29k-amd
+ os=-udi
+@@ -977,9 +1156,9 @@
+ basic_machine=hppa1.1-winbond
+ os=-proelf
+ ;;
+- windows32)
+- basic_machine=i386-pc
+- os=-windows32-msvcrt
++ xbox)
++ basic_machine=i686-pc
++ os=-mingw32
+ ;;
+ xps | xps100)
+ basic_machine=xps100-honeywell
+@@ -992,6 +1171,10 @@
+ basic_machine=z8k-unknown
+ os=-sim
+ ;;
++ z80-*-coff)
++ basic_machine=z80-unknown
++ os=-sim
++ ;;
+ none)
+ basic_machine=none-none
+ os=-none
+@@ -1011,6 +1194,9 @@
+ romp)
+ basic_machine=romp-ibm
+ ;;
++ mmix)
++ basic_machine=mmix-knuth
++ ;;
+ rs6000)
+ basic_machine=rs6000-ibm
+ ;;
+@@ -1027,13 +1213,10 @@
+ we32k)
+ basic_machine=we32k-att
+ ;;
+- sh3 | sh4 | sh3eb | sh4eb | sh[1234]le | sh3ele)
++ sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
+ basic_machine=sh-unknown
+ ;;
+- sh64)
+- basic_machine=sh64-unknown
+- ;;
+- sparc | sparcv9 | sparcv9b)
++ sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
+ basic_machine=sparc-sun
+ ;;
+ cydra)
+@@ -1080,6 +1263,9 @@
+ # First match some system type aliases
+ # that might get confused with valid system types.
+ # -solaris* is a basic system type, with this one exception.
++ -auroraux)
++ os=-auroraux
++ ;;
+ -solaris1 | -solaris1.*)
+ os=`echo $os | sed -e 's|solaris1|sunos4|'`
+ ;;
+@@ -1100,24 +1286,30 @@
+ # Each alternative MUST END IN A *, to match a version number.
+ # -sysv* is not here because it comes later, after sysvr4.
+ -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+- | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+- | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
++ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
++ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
++ | -sym* | -kopensolaris* \
+ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+- | -aos* \
++ | -aos* | -aros* \
+ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+- | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
+- | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
++ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
++ | -openbsd* | -solidbsd* \
++ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
++ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+- | -chorusos* | -chorusrdb* \
++ | -chorusos* | -chorusrdb* | -cegcc* \
+ | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+- | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
+- | -interix* | -uwin* | -rhapsody* | -darwin* | -opened* \
++ | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
++ | -uxpv* | -beos* | -mpeix* | -udk* \
++ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
+ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+- | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* | -powermax*)
++ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
++ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
++ | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
+ # Remember, each alternative MUST END IN *, to match a version number.
+ ;;
+ -qnx*)
+@@ -1129,16 +1321,21 @@
+ ;;
+ esac
+ ;;
++ -nto-qnx*)
++ ;;
+ -nto*)
+- os=-nto-qnx
++ os=`echo $os | sed -e 's|nto|nto-qnx|'`
+ ;;
+ -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+- | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
++ | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
+ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+ ;;
+ -mac*)
+ os=`echo $os | sed -e 's|mac|macos|'`
+ ;;
++ -linux-dietlibc)
++ os=-linux-dietlibc
++ ;;
+ -linux*)
+ os=`echo $os | sed -e 's|linux|linux-gnu|'`
+ ;;
+@@ -1151,6 +1348,9 @@
+ -opened*)
+ os=-openedition
+ ;;
++ -os400*)
++ os=-os400
++ ;;
+ -wince*)
+ os=-wince
+ ;;
+@@ -1172,6 +1372,9 @@
+ -atheos*)
+ os=-atheos
+ ;;
++ -syllable*)
++ os=-syllable
++ ;;
+ -386bsd)
+ os=-bsd
+ ;;
+@@ -1194,6 +1397,9 @@
+ -sinix*)
+ os=-sysv4
+ ;;
++ -tpf*)
++ os=-tpf
++ ;;
+ -triton*)
+ os=-sysv3
+ ;;
+@@ -1224,6 +1430,20 @@
+ -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+ os=-mint
+ ;;
++ -aros*)
++ os=-aros
++ ;;
++ -kaos*)
++ os=-kaos
++ ;;
++ -zvmoe)
++ os=-zvmoe
++ ;;
++ -dicos*)
++ os=-dicos
++ ;;
++ -nacl*)
++ ;;
+ -none)
+ ;;
+ *)
+@@ -1246,6 +1466,12 @@
+ # system, and we'll never get to this point.
+
+ case $basic_machine in
++ score-*)
++ os=-elf
++ ;;
++ spu-*)
++ os=-elf
++ ;;
+ *-acorn)
+ os=-riscix1.2
+ ;;
+@@ -1255,6 +1481,9 @@
+ arm*-semi)
+ os=-aout
+ ;;
++ c4x-* | tic4x-*)
++ os=-coff
++ ;;
+ # This must come before the *-dec entry.
+ pdp10-*)
+ os=-tops20
+@@ -1280,6 +1509,9 @@
+ m68*-cisco)
+ os=-aout
+ ;;
++ mep-*)
++ os=-elf
++ ;;
+ mips*-cisco)
+ os=-elf
+ ;;
+@@ -1298,9 +1530,15 @@
+ *-be)
+ os=-beos
+ ;;
++ *-haiku)
++ os=-haiku
++ ;;
+ *-ibm)
+ os=-aix
+ ;;
++ *-knuth)
++ os=-mmixware
++ ;;
+ *-wec)
+ os=-proelf
+ ;;
+@@ -1403,7 +1641,7 @@
+ -sunos*)
+ vendor=sun
+ ;;
+- -aix*)
++ -cnk*|-aix*)
+ vendor=ibm
+ ;;
+ -beos*)
+@@ -1433,9 +1671,15 @@
+ -mvs* | -opened*)
+ vendor=ibm
+ ;;
++ -os400*)
++ vendor=ibm
++ ;;
+ -ptx*)
+ vendor=sequent
+ ;;
++ -tpf*)
++ vendor=ibm
++ ;;
+ -vxsim* | -vxworks* | -windiss*)
+ vendor=wrs
+ ;;
+@@ -1460,7 +1704,7 @@
+ esac
+
+ echo $basic_machine$os
+-exit 0
++exit
+
+ # Local variables:
+ # eval: (add-hook 'write-file-hooks 'time-stamp)
diff --git a/libtextcat/makefile.mk b/libtextcat/makefile.mk
new file mode 100644
index 000000000000..01a2a6eadc36
--- /dev/null
+++ b/libtextcat/makefile.mk
@@ -0,0 +1,85 @@
+#*************************************************************************
+#
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# Copyright 2000, 2010 Oracle and/or its affiliates.
+#
+# OpenOffice.org - a multi-platform office productivity suite
+#
+# This file is part of OpenOffice.org.
+#
+# OpenOffice.org is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License version 3
+# only, as published by the Free Software Foundation.
+#
+# OpenOffice.org is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License version 3 for more details
+# (a copy is included in the LICENSE file that accompanied this code).
+#
+# You should have received a copy of the GNU Lesser General Public License
+# version 3 along with OpenOffice.org. If not, see
+# <http://www.openoffice.org/license.html>
+# for a copy of the LGPLv3 License.
+#
+#*************************************************************************
+
+PRJ=.
+
+PRJNAME=libtextcat
+TARGET=libtextcat
+
+# --- Settings -----------------------------------------------------
+
+.INCLUDE : settings.mk
+
+# --- Files --------------------------------------------------------
+
+TARFILE_NAME=libtextcat-2.2
+TARFILE_MD5=128cfc86ed5953e57fe0f5ae98b62c2e
+TARFILE_ROOTDIR=libtextcat-2.2
+
+PATCH_FILES=libtextcat-2.2.patch
+
+
+ADDITIONAL_FILES= \
+ src$/utf8misc.h \
+ src$/utf8misc.c \
+ src$/win32_config.h \
+ src$/makefile.mk \
+ src$/libtextcat.map
+
+.IF "$(GUI)"=="UNX"
+#CONFIGURE_DIR=$(BUILD_DIR)
+
+#relative to CONFIGURE_DIR
+CONFIGURE_ACTION=configure CFLAGS="$(ARCH_FLAGS) $(EXTRA_CFLAGS)"
+CONFIGURE_FLAGS=$(eq,$(OS),MACOSX CPPFLAGS="$(EXTRA_CDEFS)" $(NULL))
+
+BUILD_ACTION=make
+
+OUT2LIB=$(BUILD_DIR)$/src$/.libs$/libtextcat*$(DLLPOST)
+
+.ENDIF # "$(GUI)"=="UNX"
+
+
+.IF "$(GUI)"=="WNT" || "$(GUI)"=="OS2"
+BUILD_ACTION=cd src && dmake $(MAKEMACROS)
+.ENDIF # "$(GUI)"=="WNT" || "$(GUI)"=="OS2"
+
+
+OUT2INC= \
+ $(BUILD_DIR)$/src$/config.h \
+ $(BUILD_DIR)$/src$/common.h \
+ $(BUILD_DIR)$/src$/fingerprint.h \
+ $(BUILD_DIR)$/src$/textcat.h \
+ $(BUILD_DIR)$/src$/wg_mempool.h
+
+
+# --- Targets ------------------------------------------------------
+
+.INCLUDE : set_ext.mk
+.INCLUDE : target.mk
+.INCLUDE : tg_ext.mk
+
diff --git a/libtextcat/prj/build.lst b/libtextcat/prj/build.lst
new file mode 100644
index 000000000000..da155db3d291
--- /dev/null
+++ b/libtextcat/prj/build.lst
@@ -0,0 +1,3 @@
+ltc libtextcat : stlport soltools solenv NULL
+ltc libtextcat usr1 - all ltc_mkout NULL
+ltc libtextcat nmake - all ltc_libtextcat NULL
diff --git a/libtextcat/prj/d.lst b/libtextcat/prj/d.lst
new file mode 100644
index 000000000000..0e7f5636bdc0
--- /dev/null
+++ b/libtextcat/prj/d.lst
@@ -0,0 +1,12 @@
+
+..\%__SRC%\lib\lib*.* %_DEST%\lib%_EXT%\lib*.*
+..\%__SRC%\lib\ilib*.* %_DEST%\lib%_EXT%\ilib*.*
+..\%__SRC%\bin\l*.dll %_DEST%\bin%_EXT%\*.dll
+
+mkdir: %_DEST%\inc%_EXT%\libtextcat
+..\%__SRC%\misc\build\libtextcat-2.2\src\*.h %_DEST%\inc%_EXT%\libtextcat\*.h
+
+# data for language guessing
+..\data\new_fingerprints\fpdb.conf %COMMON_DEST%\pck%_EXT%\fpdb.conf
+..\data\new_fingerprints\lm\*.lm %COMMON_DEST%\pck%_EXT%\*.lm
+