wiki-to-help/convert.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222

#!/usr/bin/env python
#!/usr/bin/python -i
"""
Convert an XML-Dump to platformspecific help files.
Copyright 2011 Timo Richter

This program depends on:
mwlib
python
python-lxml
xsltproc
Microsoft HHC: http://go.microsoft.com/fwlink/?LinkId=14188


"""

import subprocess, tempfile, os, shutil, argparse

#import mwlib_mods # is being imported. see below
from hhc import HHC
from mw import MW
from metabook_translated import MetabookTranslated
from metabook_translated import LanguageSeparator
from executor import Executor

scriptpath=os.path.dirname(os.path.realpath(__file__) )

class Main(object):
    ''' Defines program parameters and returns them as a dictionary '''
    def parseArgs(self):
        parser = argparse.ArgumentParser(description='Conversion from a mediawiki xml-dumpfile to helpfiles')
        parser.add_argument("--startpage", metavar="PATH", dest="startpage", default=None, type=str, help="Sets a HTML-file as the start page")
        parser.add_argument("--images", metavar="PATH", dest="imgPath", default=None, type=str, help="Uses images from PATH. PATH is a zipfile or a directory.")
        parser.add_argument("--keep", dest="keepTmp", default=False, action='store_true', help="Keeps temporary files in /tmp")
        parser.add_argument("--only-en", dest="onlyEn", action='store_true', default=False, help="Converts only English articles")
        parser.add_argument("--no-chm", dest="createChm", default=True, action='store_false', help="Avoids creation of a CHM file at the end")
        parser.add_argument("-v", dest="verbose", default=False, action='store_true', help="Verbose")
        parser.add_argument("input", type=str, help="XML input")
        parser.add_argument("output", type=str, help="Directory for output")

        return parser.parse_args()

    def __init__(self):
        args = self.parseArgs()
        import mwlib_mods
        r = Converter(
            keepTmp=args.keepTmp, 
            createChm=args.createChm,
            source=args.input,
            dest=args.output,
            startpage=args.startpage,
            onlyEn=args.onlyEn,
            imgPath=args.imgPath,
            verbose=args.verbose,
        )()
        exit(int(not r))


class Converter(object):
    verbose=False
    createChm = None # 
    keepTmp = None # 
    #style=os.path.join(scriptpath,'xsl/htmlhelp/htmlhelp.xsl') # final
    style=os.path.join(scriptpath,'htmlhelp.xsl') # final
    title="Book" # final

    tmp=None
    includeFiles=[]

    def __init__(self,source,dest,onlyEn,imgPath,verbose,
        keepTmp=False,createChm=True,startpage=None):
        """
        Parameters are documented in Main.parseArgs()
        """
        self.createChm = createChm
        self.keepTmp=keepTmp
        self.tmp = tempfile.mkdtemp()
        self.style = os.path.abspath(self.style)
        source = os.path.abspath(source)
        dest = os.path.abspath(dest)
        if startpage is not None:
            startpage = os.path.abspath(startpage)
        self.source=source
        self.dest=dest
        self.startpage=startpage
        self.onlyEn = onlyEn
        self.imgPath = imgPath
        self.verbose = verbose
        self.ex = Executor(showErr=verbose,showOutput=True,showCmd=verbose)
        self.hhc = HHC(showErr=True,showOutput=verbose,showCmd=verbose)
        self.title = self.getTitle(self.title)

    def getTitle(self,default=None):
        """
        If given, return TEXT from <siteinfo><sitename>TEXT</sitename></siteinfo>
            in xml file self.source.
        Otherwise return @default
        """
        import xml.dom.minidom
        print "Loading title"
        dom = xml.dom.minidom.parse(self.source)
        try:
            siteinfo = dom.getElementsByTagName("siteinfo")[0]
            sitename = siteinfo.getElementsByTagName("sitename")[0]
            name = sitename.childNodes[0].data
        except IndexError:
            return default
        else:
            return name
        
    def createDir(self,path):
        try:
            os.mkdir(path)
        except OSError:
            pass

    def setupImgPath(self):
        """
        If --images is not given, the path will be in the format "images/name.jpg".
        If --images is given a zipfile, it is being extracted to "images/".
        If --images is a directory, it is being copied to "images/".
        The filenames in images/ are being stored to self.includeFiles.
        """ 
        imgDest = "images" # puts images to output/imgDest/
        if not self.imgPath:
            self.imgPath = os.path.join(imgDest,"IMAGENAME")
            return
        extension = os.path.splitext(self.imgPath)[1].lower()
        imgTmp = os.path.join(self.tmp,imgDest)
        print "Copying images..."
        if extension == ".zip":
            self.ex("unzip","-q","-o","-j","-d",imgTmp,self.imgPath)
        else:
            shutil.copytree(self.imgPath,imgTmp)
        shutil.copytree(imgTmp, os.path.join(self.dest,imgDest) )
        self.imgPath = os.path.join(imgDest,"IMAGENAME")
        # Save filenames for inclusion in chm
        for fname in os.listdir(imgTmp):
            fpath = os.path.join(imgDest,fname)
            self.includeFiles.append(fpath)

    def writeHhp(self):
        """
        Writes changes to the .hhp-file.
        self.includeFiles will be flushed to the hhp.
        """
        hhp=os.path.join(self.tmp,"htmlhelp.hhp")
        with open(hhp,"a") as f:
            f.write("\n".join(self.includeFiles))

    def __call__(self):
        """
        Create the environment for conversion and call convert()
        @return boolean Success
        """
        tmp = self.tmp
        self.createDir(self.dest)

        print "Working directory: "+tmp

        self.setupImgPath()

        shutil.copy(os.path.join(scriptpath,"nfo.json"),tmp)
        metabook_template = os.path.join(scriptpath,"metabook.json")
        ls = LanguageSeparator.fromFileToFiles(metabook_template,self.source,tmp)
        MW.buildcdb(self.source,tmp)

        if self.onlyEn:
            return self.convert("en",ls["en"])
        else:

            for lang, metabook in ls.iteritems():
                if not self.convert(lang,metabook): return False

    def convert(self,lang,metabook):
        """
        Private.
        This function executes the programs for the conversion.
        @lang Language of book
        @metabook Path to metabook-json-file
        """
        print "Rendering language "+lang
        tmp = self.tmp
        docbookfile = os.path.join(tmp,"%s.xml"%lang)
        chmDest = os.path.join(self.dest,lang+".chm")

        renderArgs = ("-L",lang,"-W","imagesrcresolver=%s"%self.imgPath,
            "--config=%s/wikiconf.txt"%(tmp),
            "-w","docbook","-o",docbookfile,"-m",metabook,"--title",self.title)
        MW.quietCall(MW.render,renderArgs,showErr=self.verbose)
        shutil.copy(docbookfile,self.dest)
        print "Parsing docbook"
        xsltreturn = self.ex("/usr/bin/xsltproc","--nonet","--novalid","-o",tmp+'/',self.style,docbookfile)
        if not xsltreturn: return False
        self.setStartpage(self.startpage)
        self.writeHhp()
        if self.createChm:
            print("Compiling chm...")
            self.hhc(tmp)
            shutil.copy(os.path.join(tmp,'htmlhelp.chm'),chmDest)
        return True

    def setStartpage(self,startpage):
        """
        Private.
        Copies @startpage to our tmp dir so that it will be used as the start page.
        @return False if @startpage doesnt exist, otherwise True.
        """
        if startpage is None: return True
        filename="index.html"
        if not os.path.exists(startpage): return False
        os.remove(os.path.join(self.tmp,filename))
        shutil.copy(startpage, os.path.join(self.tmp,filename))
        return True

    def __del__(self):
        if not self.keepTmp:
            shutil.rmtree(self.tmp) # remove temp files

if __name__ == '__main__':
    Main()