mythes/mythes-1.2.0-vanilla-th-gen-idx.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

--- misc/mythes-1.2.3.orig/th_gen_idx.pl
+++ misc/build/mythes-1.2.3/th_gen_idx.pl
@@ -1,11 +1,25 @@
-#!/usr/bin/perl
-
-# perl program to take a thesaurus structured text data file 
-# and create the proper sorted index file (.idx)
+:
+eval 'exec perl -wS $0 ${1+"$@"}'
+    if 0;
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This file incorporates work covered by the following license notice:
+#
+#   Licensed to the Apache Software Foundation (ASF) under one or more
+#   contributor license agreements. See the NOTICE file distributed
+#   with this work for additional information regarding copyright
+#   ownership. The ASF licenses this file to you under the Apache
+#   License, Version 2.0 (the "License"); you may not use this file
+#   except in compliance with the License. You may obtain a copy of
+#   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+#
 #
-# typically invoked as follows:
-# cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx
 #
 
 sub by_entry {
     my ($aent, $aoff) = split('\|',$a);
@@ -13,6 +34,27 @@ sub by_entry {
     $aent cmp $bent;
 }
 
+#FIXME: someone may want "infile" or even parameter parsing
+sub get_outfile {
+	my $next_is_file = 0;
+	foreach ( @ARGV ) {
+		if ( $next_is_file ) {
+			return $_
+		}
+		if ( $_ eq "-o" ) {
+			$next_is_file = 1;
+		}
+	}
+	return "";
+}
+
+sub usage {
+	print "usage:\n";
+	print "$0 -o outfile < input\n";
+
+	exit 99;
+}
+
 # main routine
 my $ne = 0;       # number of entries in index
 my @tindex=();    # the index itself
@@ -24,6 +66,10 @@ my $nm=0;         # number of meaning fo
 my $meaning="";   # current meaning and synonyms
 my $p;            # misc uses
 my $encoding;     # encoding used by text file
+my $outfile = "";
+
+$outfile = get_outfile();
+usage() if ( $outfile eq "" );
 
 # top line of thesaurus provides encoding
 $encoding=<STDIN>;
@@ -51,9 +97,13 @@ while ($rec=<STDIN>){
 # now we have all of the information
 # so sort it and then output the encoding, count and index data
 @tindex = sort by_entry @tindex;
-print STDOUT "$encoding\n";
-print STDOUT "$ne\n";
+
+print "$outfile\n";
+open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!";
+print OUTFILE "$encoding\n";
+print OUTFILE "$ne\n";
 foreach $one (@tindex) {
-    print STDOUT "$one\n";
+    print OUTFILE "$one\n";
 }
+close OUTFILE;