summaryrefslogtreecommitdiff
path: root/wiki-to-help/convert.py
diff options
context:
space:
mode:
authorNorbert Thiebaud <nthiebaud@gmail.com>2012-09-01 09:51:27 -0500
committerNorbert Thiebaud <nthiebaud@gmail.com>2012-10-16 11:07:30 -0500
commit61173c1b58efa79c0ba6b08348d2796a249d0186 (patch)
tree00ebf544db18942e2a1ecfc5e5fa16931127d38f /wiki-to-help/convert.py
parent3dc2e7497f1798ae4ff6c5c8c562666bc10a393c (diff)
move help structure one directory up
Change-Id: Ie970e39fbb6795a92d9fdd13510409d7dcd071bc
Diffstat (limited to 'wiki-to-help/convert.py')
-rwxr-xr-xwiki-to-help/convert.py222
1 files changed, 222 insertions, 0 deletions
diff --git a/wiki-to-help/convert.py b/wiki-to-help/convert.py
new file mode 100755
index 0000000000..323a834d19
--- /dev/null
+++ b/wiki-to-help/convert.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+#!/usr/bin/python -i
+"""
+Convert an XML-Dump to platformspecific help files.
+Copyright 2011 Timo Richter
+
+This program depends on:
+mwlib
+python
+python-lxml
+xsltproc
+Microsoft HHC: http://go.microsoft.com/fwlink/?LinkId=14188
+
+
+"""
+
+import subprocess, tempfile, os, shutil, argparse
+
+#import mwlib_mods # is being imported. see below
+from hhc import HHC
+from mw import MW
+from metabook_translated import MetabookTranslated
+from metabook_translated import LanguageSeparator
+from executor import Executor
+
+scriptpath=os.path.dirname(os.path.realpath(__file__) )
+
+class Main(object):
+ ''' Defines program parameters and returns them as a dictionary '''
+ def parseArgs(self):
+ parser = argparse.ArgumentParser(description='Conversion from a mediawiki xml-dumpfile to helpfiles')
+ parser.add_argument("--startpage", metavar="PATH", dest="startpage", default=None, type=str, help="Sets a HTML-file as the start page")
+ parser.add_argument("--images", metavar="PATH", dest="imgPath", default=None, type=str, help="Uses images from PATH. PATH is a zipfile or a directory.")
+ parser.add_argument("--keep", dest="keepTmp", default=False, action='store_true', help="Keeps temporary files in /tmp")
+ parser.add_argument("--only-en", dest="onlyEn", action='store_true', default=False, help="Converts only English articles")
+ parser.add_argument("--no-chm", dest="createChm", default=True, action='store_false', help="Avoids creation of a CHM file at the end")
+ parser.add_argument("-v", dest="verbose", default=False, action='store_true', help="Verbose")
+ parser.add_argument("input", type=str, help="XML input")
+ parser.add_argument("output", type=str, help="Directory for output")
+
+ return parser.parse_args()
+
+ def __init__(self):
+ args = self.parseArgs()
+ import mwlib_mods
+ r = Converter(
+ keepTmp=args.keepTmp,
+ createChm=args.createChm,
+ source=args.input,
+ dest=args.output,
+ startpage=args.startpage,
+ onlyEn=args.onlyEn,
+ imgPath=args.imgPath,
+ verbose=args.verbose,
+ )()
+ exit(int(not r))
+
+
+class Converter(object):
+ verbose=False
+ createChm = None #
+ keepTmp = None #
+ #style=os.path.join(scriptpath,'xsl/htmlhelp/htmlhelp.xsl') # final
+ style=os.path.join(scriptpath,'htmlhelp.xsl') # final
+ title="Book" # final
+
+ tmp=None
+ includeFiles=[]
+
+ def __init__(self,source,dest,onlyEn,imgPath,verbose,
+ keepTmp=False,createChm=True,startpage=None):
+ """
+ Parameters are documented in Main.parseArgs()
+ """
+ self.createChm = createChm
+ self.keepTmp=keepTmp
+ self.tmp = tempfile.mkdtemp()
+ self.style = os.path.abspath(self.style)
+ source = os.path.abspath(source)
+ dest = os.path.abspath(dest)
+ if startpage is not None:
+ startpage = os.path.abspath(startpage)
+ self.source=source
+ self.dest=dest
+ self.startpage=startpage
+ self.onlyEn = onlyEn
+ self.imgPath = imgPath
+ self.verbose = verbose
+ self.ex = Executor(showErr=verbose,showOutput=True,showCmd=verbose)
+ self.hhc = HHC(showErr=True,showOutput=verbose,showCmd=verbose)
+ self.title = self.getTitle(self.title)
+
+ def getTitle(self,default=None):
+ """
+ If given, return TEXT from <siteinfo><sitename>TEXT</sitename></siteinfo>
+ in xml file self.source.
+ Otherwise return @default
+ """
+ import xml.dom.minidom
+ print "Loading title"
+ dom = xml.dom.minidom.parse(self.source)
+ try:
+ siteinfo = dom.getElementsByTagName("siteinfo")[0]
+ sitename = siteinfo.getElementsByTagName("sitename")[0]
+ name = sitename.childNodes[0].data
+ except IndexError:
+ return default
+ else:
+ return name
+
+ def createDir(self,path):
+ try:
+ os.mkdir(path)
+ except OSError:
+ pass
+
+ def setupImgPath(self):
+ """
+ If --images is not given, the path will be in the format "images/name.jpg".
+ If --images is given a zipfile, it is being extracted to "images/".
+ If --images is a directory, it is being copied to "images/".
+ The filenames in images/ are being stored to self.includeFiles.
+ """
+ imgDest = "images" # puts images to output/imgDest/
+ if not self.imgPath:
+ self.imgPath = os.path.join(imgDest,"IMAGENAME")
+ return
+ extension = os.path.splitext(self.imgPath)[1].lower()
+ imgTmp = os.path.join(self.tmp,imgDest)
+ print "Copying images..."
+ if extension == ".zip":
+ self.ex("unzip","-q","-o","-j","-d",imgTmp,self.imgPath)
+ else:
+ shutil.copytree(self.imgPath,imgTmp)
+ shutil.copytree(imgTmp, os.path.join(self.dest,imgDest) )
+ self.imgPath = os.path.join(imgDest,"IMAGENAME")
+ # Save filenames for inclusion in chm
+ for fname in os.listdir(imgTmp):
+ fpath = os.path.join(imgDest,fname)
+ self.includeFiles.append(fpath)
+
+ def writeHhp(self):
+ """
+ Writes changes to the .hhp-file.
+ self.includeFiles will be flushed to the hhp.
+ """
+ hhp=os.path.join(self.tmp,"htmlhelp.hhp")
+ with open(hhp,"a") as f:
+ f.write("\n".join(self.includeFiles))
+
+ def __call__(self):
+ """
+ Create the environment for conversion and call convert()
+ @return boolean Success
+ """
+ tmp = self.tmp
+ self.createDir(self.dest)
+
+ print "Working directory: "+tmp
+
+ self.setupImgPath()
+
+ shutil.copy(os.path.join(scriptpath,"nfo.json"),tmp)
+ metabook_template = os.path.join(scriptpath,"metabook.json")
+ ls = LanguageSeparator.fromFileToFiles(metabook_template,self.source,tmp)
+ MW.buildcdb(self.source,tmp)
+
+ if self.onlyEn:
+ return self.convert("en",ls["en"])
+ else:
+
+ for lang, metabook in ls.iteritems():
+ if not self.convert(lang,metabook): return False
+
+ def convert(self,lang,metabook):
+ """
+ Private.
+ This function executes the programs for the conversion.
+ @lang Language of book
+ @metabook Path to metabook-json-file
+ """
+ print "Rendering language "+lang
+ tmp = self.tmp
+ docbookfile = os.path.join(tmp,"%s.xml"%lang)
+ chmDest = os.path.join(self.dest,lang+".chm")
+
+ renderArgs = ("-L",lang,"-W","imagesrcresolver=%s"%self.imgPath,
+ "--config=%s/wikiconf.txt"%(tmp),
+ "-w","docbook","-o",docbookfile,"-m",metabook,"--title",self.title)
+ MW.quietCall(MW.render,renderArgs,showErr=self.verbose)
+ shutil.copy(docbookfile,self.dest)
+ print "Parsing docbook"
+ xsltreturn = self.ex("/usr/bin/xsltproc","--nonet","--novalid","-o",tmp+'/',self.style,docbookfile)
+ if not xsltreturn: return False
+ self.setStartpage(self.startpage)
+ self.writeHhp()
+ if self.createChm:
+ print("Compiling chm...")
+ self.hhc(tmp)
+ shutil.copy(os.path.join(tmp,'htmlhelp.chm'),chmDest)
+ return True
+
+ def setStartpage(self,startpage):
+ """
+ Private.
+ Copies @startpage to our tmp dir so that it will be used as the start page.
+ @return False if @startpage doesnt exist, otherwise True.
+ """
+ if startpage is None: return True
+ filename="index.html"
+ if not os.path.exists(startpage): return False
+ os.remove(os.path.join(self.tmp,filename))
+ shutil.copy(startpage, os.path.join(self.tmp,filename))
+ return True
+
+ def __del__(self):
+ if not self.keepTmp:
+ shutil.rmtree(self.tmp) # remove temp files
+
+if __name__ == '__main__':
+ Main()
+