diff options
author | Muthu Subramanian K <sumuthu@novell.com> | 2010-11-05 12:40:28 +0100 |
---|---|---|
committer | Jan Holesovsky <kendy@suse.cz> | 2010-11-05 13:08:15 +0100 |
commit | 962aaaced4539cc648cdd8236c36dadd4e77a871 (patch) | |
tree | 1e41d915805f17bf615fd27b1e8dda36ee6740f7 | |
parent | c85e98534260c57e3f10f2047a55410cd203b360 (diff) |
Help -> wiki converter.
-rwxr-xr-x | helpcontent2/to-wiki/convall.py | 38 | ||||
-rwxr-xr-x | helpcontent2/to-wiki/getalltitles.py | 114 | ||||
-rwxr-xr-x | helpcontent2/to-wiki/wikiconv2.py | 493 |
3 files changed, 645 insertions, 0 deletions
diff --git a/helpcontent2/to-wiki/convall.py b/helpcontent2/to-wiki/convall.py new file mode 100755 index 0000000000..bb0483fed5 --- /dev/null +++ b/helpcontent2/to-wiki/convall.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +import os, sys + +titles = [[]] + +def loadallfiles(filename): + global titles + file=open(filename,"r") + for line in file: + title = line.split(";") + titles.append(title) + +loadallfiles("alltitles.csv") + +for title in titles: + command = "" + outfile = "" + infile = "" + try: + outfile = "wiki/"+title[1].strip() + infile = title[0].strip() + command = "python wikiconv2.py "+infile+" > "+outfile + except: + continue + + try: + file = open(outfile,"r") + except: + print "Processing: "+infile + if not os.system(command): + # print "Failed: "+command + # sys.exit(1) + pass + continue + print "Warning: Skipping: "+command + file.close() + sys.exit(1) diff --git a/helpcontent2/to-wiki/getalltitles.py b/helpcontent2/to-wiki/getalltitles.py new file mode 100755 index 0000000000..cb0527e88b --- /dev/null +++ b/helpcontent2/to-wiki/getalltitles.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python + +import sys +import os +import xml.parsers.expat + +title="" +parsing=True +istitle=False +alltitles=[] + +def is_present(title): + for i in alltitles: + try: + if i.strip() == title.strip(): + return True + except: + return False + return False + +def make_unique(title): + n=0 + t = title + while is_present(t): + n=n+1 + t = title+"_%d"%(n) + return t + +replace_text_list = [ + ["$[officename]","LibreOffice"], + ["%PRODUCTNAME","LibreOffice"], + ['"+"',"plus"], + ['"*"',"star"], + ['"-"',"minus"], + ['"/"',"slash"], + ['"^"',"cap"], + [')','_'], + ['(','_'], + ['\\','_'], + ['/','_'] + ] + +modules_list = [ + "sbasic", + "scalc", + "schart", + "sdraw", + "shared", + "simpress", + "smath", + "swriter" + ] + +def get_module(text): + for i in modules_list: + if text.find(i) >=0: + return i + return "" + +def replace_text(text): + for i in replace_text_list: + if text.find(i[0]) >= 0: + text = text.replace(i[0],i[1]) + return text + +def start_element(name, attrs): + global parsing, istitle + if not parsing: + return + if name == 'title': + istitle=True + +def end_element(name): + global parsing, istitle + if not parsing: + return + if name == 'title': + parsign = False + istitle = False + +def char_data(data): + global title, parsing + if not istitle: + return + title = replace_text(data) + +def parsexhp(filename): + global parsing, title + parsing = True + file=open(filename,"r") + p = xml.parsers.expat.ParserCreate() + p.StartElementHandler = start_element + p.EndElementHandler = end_element + p.CharacterDataHandler = char_data + buf = file.read() + p.Parse(buf) + file.close() + title=get_module(filename)+"/"+title + title = title.replace(" ","_") + title = make_unique(title) + alltitles.append(title) + print filename+";"+title + +if len(sys.argv) < 2: + print "getalltitles.py <directory>" + print "e.g. getalltitles.py helcontent2/source/text/scalc" + sys.exit(1) + +pattern = "xhp" + +for root, dirs, files in os.walk(sys.argv[1]): + for i in files: + if i.find(pattern) >= 0: + parsexhp(root+"/"+i) diff --git a/helpcontent2/to-wiki/wikiconv2.py b/helpcontent2/to-wiki/wikiconv2.py new file mode 100755 index 0000000000..11dfb97cd2 --- /dev/null +++ b/helpcontent2/to-wiki/wikiconv2.py @@ -0,0 +1,493 @@ +#!/usr/bin/env python + +import sys +import xml.parsers.expat + +root="helpcontent2/source/" + +titles = [[]] + +start_eles = [ + ["emph","'''"] + ] + +end_eles = [ + ["emph","'''"] + ] + +replace_text_list = [ + ["$[officename]","LibreOffice"], + ["%PRODUCTNAME","LibreOffice"] + ] + +def get_link_filename(link, name): + text = link + if link.find("http") >= 0: + text = name + for title in titles: + try: + if title[0].find(text) >= 0: + return title[1].strip() + except: + pass + return link + +def replace_text(text): + for i in replace_text_list: + if text.find(i[0]) >= 0: + text = text.replace(i[0],i[1]) + return text + +def heading(level): + str="" + for i in range(0,level): + str = str+"=" + return str + +class cxml: + def __init__(self, sectionid): + self.filter_section=sectionid + self.objects=[] + self.child_parsing=False + self.parser_state=True + self.depth=1 + if sectionid != "": + self.parser_state=False + def start_element(self, name, attrs): + if name == 'section': + if self.filter_section != "" and attrs['id'] == self.filter_section: + self.parser_state=True + if name == 'paragraph': + if not self.parser_state: + para=cparagraph(attrs, self, self.filter_section, self.depth) + else: + para=cparagraph(attrs, self, '', self.depth) + self.depth = para.depth + self.child_parsing=True + self.objects.append(para) + if not self.parser_state: + return + if name == 'embed': + link=attrs['href'].replace('"','') + fname=link + section="" + if link.find("#") >= 0: + fname = link[:link.find("#")] + section = link[link.find("#")+1:] + #print "Parsing: "+fname+" Section: "+section + if fname.find("border") >= 0 or \ + fname.find("background") >= 0: + print "Ignoring: "+fname + else: + self.child_parsing = True + child_xml = cxml(section) + child_xml.depth = self.depth +1 + self.objects.append(child_xml) + parsexhp(root+fname) + self.child_parsing = False + + if name == 'table': + child = ctable(attrs, self) + self.child_parsing = True + self.objects.append(child) + + def end_element(self, name): + if not self.parser_state: + return + if self.filter_section != "" and name == 'section': + self.parser_state=False + def char_data(self, data): + pass + def get_curobj(self): + if self.child_parsing: + #try: + # raise self.objects[len(self.objects)-1] + #except cxml: + return self.objects[len(self.objects)-1].get_curobj() + #except: + # return self.objects[len(self.objects)-1] + else: + return self + def print_all(self): + for i in self.objects: + i.print_all() + + +class cimage: + def __init__(self, attrs, parent): + self.src = attrs['src'] + try: + self.width = attrs['width'] + self.height = attrs['height'] + except: + self.width = self.height = "" + self.align = 'left' + self.alt = False + self.alttext = "" + self.parent = parent + + def start_element(self, name, attrs): + if name == 'alt': + self.alt = True + + def end_element(self, name): + if name == 'alt': + self.alt = False + + if name == 'image': + self.parent.child_parsing = False + + def char_data(self, data): + if self.alt: + self.alttext = self.alttext + data + + def get_all(self): + wikitext = "[[Image:"+self.src+"|border|"+self.align+"|" + if len(self.width): + wikitext = wikitext + self.width+"x"+self.height+"|" + wikitext = wikitext + self.alttext+"]]" + return wikitext + + def print_all(self): + print self.get_all() + + def get_curobj(self): + return self + +class ctext: + def __init__(self, text): + self.wikitext = replace_text(text) + def print_all(self): + print self.wikitext + +class ctabcell: + def __init__(self, attrs, parent): + # TODO: colspan rowspan + self.objects = [] + self.child_parsing = False + self.parent = parent + self.header = False + pass + + def start_element(self, name, attrs): + if name == 'paragraph': + if attrs['role'] == 'tablehead': + self.header = True + para=cparagraph(attrs, self, '', 0) + self.child_parsing=True + self.objects.append(para) + pass + + def end_element(self, name): + if name == 'tablecell': + self.parent.child_parsing = False + pass + + def char_data(self, data): + pass + + def print_all(self): + for i in self.objects: + i.print_all() + + def get_all(self): + text = "" + for i in self.objects: + text = text + i.get_all() + return text + + def get_curobj(self): + if self.child_parsing: + return self.objects[len(self.objects)-1].get_curobj() + return self + + +class ctable: + def __init__(self, attrs, parent): + # TODO/Check: Might Require filtering too... + try: + self.tableid = attrs['id'] + except: + self.tableid = 0 + self.header = [] + self.crow = [] + self.content = [[]] + self.child_parsing = False + self.child = None + self.parent = parent + + def check_add_cell(self): + if self.child: + self.crow.append(self.child) + self.child = None + + def check_add_row(self): + if len(self.crow): + if self.crow[0].header: + self.header = self.crow + else: + self.content.append(self.crow) + self.crow = [] + + def start_element(self, name, attrs): + if name == 'tablecell': + self.check_add_cell() + self.child = ctabcell(attrs, self) + self.child_parsing = True + if name == 'tablerow': + self.check_add_cell() + self.check_add_row() + + def end_element(self, name): + if name == 'table': + # the following checks may be unnecessary + self.check_add_cell() + self.check_add_row() + self.parent.child_parsing = False + + def char_data(self, data): + pass + + def get_all(self): + text = '{| border="1"' # + ' align="left"' + if len(self.header): + # text = text + "\n|+ caption" + text = text +"\n|-" + for i in self.header: + text = text + '\n! scope="col" | ' + i.get_all() + for i in self.content: + text = text + "\n|-" + for j in i: + text = text + "\n| "+j.get_all() + text = text + "\n|}" + return text + + def print_all(self): + print self.get_all().encode('ascii','replace') + + def get_curobj(self): + if self.child_parsing: + return self.child.get_curobj() + return self + +class clink: + def __init__(self, attrs, parent): + self.link = attrs['href'] + try: + self.lname = attrs['name'] + except: + self.lname = self.link[self.link.rfind("/")+1:] + # Override lname + self.lname = get_link_filename(self.link, self.lname) + self.wikitext = "" + self.parent = parent + + def start_element(self, name, attrs): + pass + + def end_element(self, name): + if name == "link": + self.parent.child_parsing = False + + def char_data(self, data): + self.wikitext = self.wikitext + data + + def get_all(self): + if self.link.find("http") >= 0: + text = "["+self.link+" "+self.wikitext+"]" + else: + text = "[["+self.lname+"|"+self.wikitext+"]]" + if self.parent.heading: + text = heading(self.parent.depth) + " " + text + " "+heading(self.parent.depth) + text = replace_text(text) + return text + + def print_all(self): + print self.get_all() + + def get_curobj(self): + return self + +# Not used yet - cparagraph itself handles it (as of now) +class cvariable: + def __init__(self, sectionid, parent): + self.parser_state=True + self.wikitext="" + if sectionid != "" and attrs['id']==sectionid: + self.parser_state=False + self.parent = parent + def start_element(self, name, attrs): + pass + def end_element(self,name): + if name == 'variable': + parent.child_parsing = False + def print_all(self): + print self.wikitext + +class cparagraph: + def __init__(self, attrs, parent, sectionid, depth): + self.child_parsing = False + self.heading=False + try: + if attrs['role'] == "heading": + self.heading = True + except: + pass + + #try: + # self.level=parent.level+1 + #except: + try: + self.level=int(attrs['level']) + except: + self.level=0 + self.filter_section=sectionid + self.parent = parent + self.objects=[] + self.parser_state=True + if depth > self.level: + self.depth = depth + else: + self.depth = self.level + self.wikitext="" + if sectionid != "": + self.parser_state = False + def __del__(self): + pass + def start_element(self, name, attrs): + if name == 'variable': + if attrs['id'] == self.filter_section: + self.parser_state=True + if name == 'paragraph': + if not self.parser_state: + child = cparagraph(attrs, self, self.filter_section, self.depth+1) + else: + child = cparagraph(attrs, self, "", self.depth+1) + self.child_parsing = True + self.objects.append(child) + + if not self.parser_state: + return + if name == 'embed': + # This shouldn't occur + print "Warning: Skipped Embedded content!!!" + if name == 'image': + child = cimage(attrs, self) + self.child_parsing = True + self.objects.append(child) + if name == 'link': + child = clink(attrs, self) + self.child_parsing = True + self.objects.append(child) + + + global start_eles + for n in start_eles: + if n[0] == name: + #self.wikitext=self.wikitext+n[1] + self.objects.append(ctext(n[1])) + break + + def end_element(self, name): + if name == 'paragraph': + self.parent.child_parsing = False + if not self.parser_state: + return + if self.filter_section != "" and name == 'varable': + self.parser_state = False + + global end_eles + for n in end_eles: + if n[0] == name: + #self.wikitext=self.wikitext+n[1] + self.objects.append(ctext(n[1])) + break + + def char_data(self, data): + if not self.parser_state or not len(data.strip()): + return + text="" + if self.heading: + text = heading(self.depth) + " " + data + " "+heading(self.depth) + else: + text = data + self.objects.append(ctext(text)) + #self.wikitext = self.wikitext + text + def print_all(self): + #if self.wikitext != "": + # print self.wikitext + text = self.get_all() + if len(text): + print text.encode('ascii','replace') + return + + for i in self.objects: + try: + raise i + except ctext: + self.wikitext = self.wikitext + i.wikitext + except clink: + self.wikitext = self.wikitext + i.get_all() + " " + except: + if len(self.wikitext): + print self.wikitext + self.wikitext="" + i.print_all() + if len(self.wikitext): + print self.wikitext + + def get_all(self): + for i in self.objects: + try: + raise i + except ctext: + self.wikitext = self.wikitext + i.wikitext + except clink: + self.wikitext = self.wikitext + i.get_all() + " " + except: + if len(self.wikitext): + self.wikitext = self.wikitext + "\n" + self.wikitext = self.wikitext + "\n" + i.get_all() + return self.wikitext + + def get_curobj(self): + if self.child_parsing: + return self.objects[len(self.objects)-1].get_curobj() + else: + return self + +head_obj=cxml("") +def start_element(name, attrs): + head_obj.get_curobj().start_element(name,attrs) + +def end_element(name): + head_obj.get_curobj().end_element(name) + +def char_data(data): + head_obj.get_curobj().char_data(data) + +def parsexhp(filename): + file=open(filename,"r") + p = xml.parsers.expat.ParserCreate() + p.StartElementHandler = start_element + p.EndElementHandler = end_element + p.CharacterDataHandler = char_data + buf = file.read() + p.Parse(buf) + file.close() + + +def loadallfiles(filename): + global titles + file=open(filename,"r") + for line in file: + title = line.split(";") + titles.append(title) + +if len(sys.argv) < 2: + print "wikiconv2.py <inputfile.xph>" + sys.exit(1) + +loadallfiles("alltitles.csv") +parsexhp(sys.argv[1]) +head_obj.print_all() |