summaryrefslogtreecommitdiff
path: root/helpcontent2/to-wiki/wikiconv2.py
diff options
context:
space:
mode:
authorNorbert Thiebaud <nthiebaud@gmail.com>2012-09-01 09:51:27 -0500
committerNorbert Thiebaud <nthiebaud@gmail.com>2012-10-16 11:07:30 -0500
commit61173c1b58efa79c0ba6b08348d2796a249d0186 (patch)
tree00ebf544db18942e2a1ecfc5e5fa16931127d38f /helpcontent2/to-wiki/wikiconv2.py
parent3dc2e7497f1798ae4ff6c5c8c562666bc10a393c (diff)
move help structure one directory up
Change-Id: Ie970e39fbb6795a92d9fdd13510409d7dcd071bc
Diffstat (limited to 'helpcontent2/to-wiki/wikiconv2.py')
-rwxr-xr-xhelpcontent2/to-wiki/wikiconv2.py1383
1 files changed, 0 insertions, 1383 deletions
diff --git a/helpcontent2/to-wiki/wikiconv2.py b/helpcontent2/to-wiki/wikiconv2.py
deleted file mode 100755
index 762484d225..0000000000
--- a/helpcontent2/to-wiki/wikiconv2.py
+++ /dev/null
@@ -1,1383 +0,0 @@
-#!/usr/bin/env python
-
-import os, sys, thread, threading, time
-import xml.parsers.expat
-import codecs
-from threading import Thread
-
-root="source/"
-max_threads = 25
-
-titles = []
-
-# map of id -> localized text
-localization_data = {}
-
-# to collect a list of pages that will be redirections to the pages with nice
-# names
-redirects = []
-
-# to collect images that we will up-load later
-images = set()
-
-# various types of paragraphs
-replace_paragraph_role = \
- {'start':{'bascode': '',
- 'code': '<code>',
- 'codeintip': '<code>',
- 'emph' : '', # must be empty to be able to strip empty <emph/>
- 'example': '<code>',
- 'heading1': '= ',
- 'heading2': '== ',
- 'heading3': '=== ',
- 'heading4': '==== ',
- 'heading5': '===== ',
- 'heading6': '====== ',
- 'head1': '= ', # used only in one file, probably in error?
- 'head2': '== ', # used only in one file, probably in error?
- 'listitem': '',
- 'note': '{{Note|',
- 'null': '', # special paragraph for Variable, CaseInline, etc.
- 'paragraph': '',
- 'related': '', # used only in one file, probably in error?
- 'relatedtopics': '', # used only in one file, probably in error?
- 'sup' : '',
- 'tablecontent': '| | ',
- 'tablecontentcode': '| | <code>',
- 'tablehead': '! scope="col" | ',
- 'tablenextpara': '\n',
- 'tablenextparacode': '\n<code>',
- 'tip': '{{Tip|',
- 'variable': '',
- 'warning': '{{Warning|',
- },
- 'end':{'bascode': '\n',
- 'code': '</code>\n\n',
- 'codeintip': '</code>\n\n',
- 'emph' : '',
- 'example': '</code>\n\n',
- 'heading1': ' =\n\n',
- 'heading2': ' ==\n\n',
- 'heading3': ' ===\n\n',
- 'heading4': ' ====\n\n',
- 'heading5': ' =====\n\n',
- 'heading6': ' ======\n\n',
- 'head1': ' =\n\n', # used only in one file, probably in error?
- 'head2': ' ==\n\n', # used only in one file, probably in error?
- 'listitem': '',
- 'note': '}}\n\n',
- 'null': '', # special paragraph for Variable, CaseInline, etc.
- 'paragraph': '\n\n',
- 'related': '\n\n', # used only in one file, probably in error?
- 'relatedtopics': '\n\n', # used only in one file, probably in error?
- 'sup' : '',
- 'tablecontent': '\n',
- 'tablecontentcode': '</code>\n',
- 'tablehead': '\n',
- 'tablenextpara': '\n',
- 'tablenextparacode': '</code>\n',
- 'tip': '}}\n\n',
- 'variable': '',
- 'warning': '}}\n\n',
- },
- 'templ':{'bascode': False,
- 'code': False,
- 'codeintip': False,
- 'emph' : False,
- 'example': False,
- 'heading1': False,
- 'heading2': False,
- 'heading3': False,
- 'heading4': False,
- 'heading5': False,
- 'heading6': False,
- 'head1': False,
- 'head2': False,
- 'listitem': False,
- 'note': True,
- 'null': False,
- 'paragraph': False,
- 'related': False,
- 'relatedtopics': False,
- 'sup' : False,
- 'tablecontent': False,
- 'tablecontentcode': False,
- 'tablehead': False,
- 'tablenextpara': False,
- 'tablenextparacode': False,
- 'tip': True,
- 'variable': False,
- 'warning': True,
- }
- }
-
-section_id_mapping = \
- {'relatedtopics': 'RelatedTopics'}
-
-# text snippets that we need to convert
-replace_text_list = \
- [["$[officename]", "{{ProductName}}"],
- ["%PRODUCTNAME", "{{ProductName}}"],
- ["$PRODUCTNAME", "{{ProductName}}"]
- ]
-
-def get_link_filename(link, name):
- text = link.strip()
- fragment = ''
- if text.find('http') == 0:
- text = name
- else:
- f = text.find('#')
- if f >= 0:
- fragment = text[f:]
- text = text[0:f]
-
- for title in titles:
- try:
- if title[0].find(text) >= 0:
- return (title[1].strip(), fragment)
- except:
- pass
- return (link, '')
-
-def replace_text(text):
- for i in replace_text_list:
- if text.find(i[0]) >= 0:
- text = text.replace(i[0],i[1])
- return text
-
-# modify the text so that in templates like {{Name|something}}, the 'something'
-# does not look like template params
-def escape_equals_sign(text):
- depth = 0
- t = ''
- for i in text:
- if i == '=':
- if depth == 0:
- t = t + '&#61;'
- else:
- t = t + '='
- else:
- t = t + i
- if i == '{' or i == '[' or i == '<':
- depth = depth + 1
- elif i == '}' or i == ']' or i == '>':
- depth = depth - 1
- if depth < 0:
- depth = 0
-
- return t
-
-def load_localization_data(sdf_file):
- global localization_data
- localization_data = {}
- try:
- file = codecs.open(sdf_file, "r", "utf-8")
- except:
- sys.stderr.write('Error: Cannot open .sdf file "%s"\n'% sdf_file)
- return False
-
- for line in file:
- line = line.strip()
- if line[0] == '#':
- continue
- spl = line.split("\t")
-
- # the form of the key is like
- # source/text/shared/explorer/database/02010100.xhp#hd_id3149233
- # otherwise we are getting duplicates
- key = '%s#%s'% (spl[1].replace('\\', '/'), spl[4])
- try:
- localization_data[key] = spl[10]
- except:
- sys.stderr.write('Warning: Ignored line "%s"\n'% line.encode('utf-8'))
-
- file.close()
- return True
-
-def unescape(str):
- unescape_map = {'<': {True:'<', False:'&lt;'},
- '>': {True:'>', False:'&gt;'},
- '&': {True:'&', False:'&amp;'},
- '"': {True:'"', False:'"'}}
- result = ''
- escape = False
- for c in str:
- if c == '\\':
- if escape:
- result = result + '\\'
- escape = False
- else:
- escape = True
- else:
- try:
- replace = unescape_map[c]
- result = result + replace[escape]
- except:
- result = result + c
- escape = False
-
- return result
-
-def get_localized_text(filename, id):
- try:
- str = localization_data['%s#%s'% (filename, id)]
- except:
- return ''
-
- return unescape(str)
-
-def href_to_fname_id(href):
- link = href.replace('"', '')
- fname = link
- id = ''
- if link.find("#") >= 0:
- fname = link[:link.find("#")]
- id = link[link.find("#")+1:]
- else:
- sys.stderr.write('Reference without a "#" in "%s".'% link)
-
- return [fname, id]
-
-# Base class for all the elements
-#
-# self.name - name of the element, to drop the self.child_parsing flag
-# self.objects - collects the child objects that are constructed during
-# parsing of the child elements
-# self.child_parsing - flag whether we are parsing a child, or the object
-# itself
-# self.parent - parent object
-class ElementBase:
- def __init__(self, name, parent):
- self.name = name
- self.objects = []
- self.child_parsing = False
- self.parent = parent
-
- def start_element(self, parser, name, attrs):
- pass
-
- def end_element(self, parser, name):
- if name == self.name:
- self.parent.child_parsing = False
-
- def char_data(self, parser, data):
- pass
-
- def get_curobj(self):
- if self.child_parsing:
- return self.objects[len(self.objects)-1].get_curobj()
- return self
-
- # start parsing a child element
- def parse_child(self, child):
- self.child_parsing = True
- self.objects.append(child)
-
- # construct the wiki representation of this object, including the objects
- # held in self.objects (here only the text of the objects)
- def get_all(self):
- text = u''
- for i in self.objects:
- text = text + i.get_all()
- return text
-
- # for handling variables, and embedding in general
- # id - the variable name we want to get
- def get_variable(self, id):
- for i in self.objects:
- if i != None:
- var = i.get_variable(id)
- if var != None:
- return var
- return None
-
- # embed part of another file into current structure
- def embed_href(self, parent_parser, fname, id):
- # parse another xhp
- parser = XhpParser('source/' + fname, False, \
- parent_parser.current_app, parent_parser.wiki_page_name, \
- parent_parser.lang)
- var = parser.get_variable(id)
-
- if var != None:
- try:
- if var.role == 'variable':
- var.role = 'paragraph'
- except:
- pass
- self.objects.append(var)
- elif parser.follow_embed:
- sys.stderr.write('Cannot find reference "#%s" in "%s".\n'% \
- (id, fname))
-
- def unhandled_element(self, parser, name):
- sys.stderr.write('Warning: Unhandled element "%s" in "%s" (%s)\n'% \
- (name, self.name, parser.filename))
-
-# Base class for trivial elements that operate on char_data
-#
-# Like <comment>, or <title>
-class TextElementBase(ElementBase):
- def __init__(self, attrs, parent, element_name, start, end, templ):
- ElementBase.__init__(self, element_name, parent)
- self.text = u''
- self.start = start
- self.end = end
- self.templ = templ
-
- def char_data(self, parser, data):
- self.text = self.text + data
-
- def get_all(self):
- if self.templ:
- return self.start + escape_equals_sign(replace_text(self.text)) + self.end
- else:
- return self.start + replace_text(self.text) + self.end
-
-class XhpFile(ElementBase):
- def __init__(self):
- ElementBase.__init__(self, None, None)
-
- def start_element(self, parser, name, attrs):
- if name == 'body':
- # ignored, we flatten the structure
- pass
- elif name == 'bookmark':
- self.parse_child(Bookmark(attrs, self, 'div', parser))
- elif name == 'comment':
- self.parse_child(Comment(attrs, self))
- elif name == 'embed' or name == 'embedvar':
- if parser.follow_embed:
- (fname, id) = href_to_fname_id(attrs['href'])
- self.embed_href(parser, fname, id)
- elif name == 'helpdocument':
- # ignored, we flatten the structure
- pass
- elif name == 'list':
- self.parse_child(List(attrs, self))
- elif name == 'meta':
- self.parse_child(Meta(attrs, self))
- elif name == 'paragraph':
- parser.parse_paragraph(attrs, self)
- elif name == 'section':
- self.parse_child(Section(attrs, self))
- elif name == 'sort':
- self.parse_child(Sort(attrs, self))
- elif name == 'switch':
- self.parse_child(Switch(attrs, self, parser.embedding_app))
- elif name == 'table':
- self.parse_child(Table(attrs, self))
- elif name == 'bascode':
- self.parse_child(BasicCode(attrs, self))
- else:
- self.unhandled_element(parser, name)
-
-class Bookmark(ElementBase):
- def __init__(self, attrs, parent, type, parser):
- ElementBase.__init__(self, 'bookmark', parent)
-
- self.type = type
-
- self.id = attrs['id']
- self.app = ''
- self.redirect = ''
- self.target = ''
- self.authoritative = False
-
- # let's construct the name of the redirect, so that we can point
- # to the wikihelp directly from the LO code; wiki then takes care of
- # the correct redirect
- branch = attrs['branch']
- if branch.find('hid/') == 0 and (parser.current_app_raw != '' or parser.follow_embed):
- name = branch[branch.find('/') + 1:]
-
- self.app = parser.current_app_raw
- self.target = parser.wiki_page_name
- self.authoritative = parser.follow_embed
- self.redirect = name
-
- def get_all(self):
- global redirects
- # first of all, we need to create a redirect page for this one
- if self.redirect != '' and self.target != '':
- redirects.append([self.app, self.redirect, \
- '%s#%s'% (self.target, self.id), \
- self.authoritative])
-
- # then we also have to setup ID inside the page
- if self.type == 'div':
- return '<div id="%s"></div>\n'% self.id
- elif self.type == 'span':
- return '<span id="%s"></span>'% self.id
- else:
- sys.stderr.write('Unknown bookmark type "%s"'% self.type)
-
- return ''
-
-class Image(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'image', parent)
- self.src = attrs['src']
- self.align = 'left'
- self.alt = False
- self.alttext = ""
-
- def start_element(self, parser, name, attrs):
- if name == 'alt':
- self.alt = True
- else:
- self.unhandled_element(parser, name)
-
- def end_element(self, parser, name):
- ElementBase.end_element(self, parser, name)
-
- if name == 'alt':
- self.alt = False
-
- def char_data(self, parser, data):
- if self.alt:
- self.alttext = self.alttext + data
-
- def get_all(self):
- global images
- images.add(self.src)
-
- name = self.src[self.src.rfind('/') + 1:]
- wikitext = "[[Image:"+name+"|border|"+self.align+"|"
- wikitext = wikitext + self.alttext+"]]"
- return wikitext
-
- def get_curobj(self):
- return self
-
-class Br(TextElementBase):
- def __init__(self, attrs, parent):
- TextElementBase.__init__(self, attrs, parent, 'br', '<br/>', '', False)
-
-class Comment(TextElementBase):
- def __init__(self, attrs, parent):
- TextElementBase.__init__(self, attrs, parent, 'comment', '<!-- ', ' -->', False)
-
-class HelpIdMissing(TextElementBase):
- def __init__(self, attrs, parent):
- TextElementBase.__init__(self, attrs, parent, 'help-id-missing', '{{MissingHelpId}}', '', False)
-
-class Text:
- def __init__(self, text):
- self.wikitext = replace_text(text)
-
- def get_all(self):
- return self.wikitext
-
- def get_variable(self, id):
- return None
-
-class TableCell(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'tablecell', parent)
- self.cellHasChildElement = False
-
- def start_element(self, parser, name, attrs):
- self.cellHasChildElement = True
- if name == 'bookmark':
- self.parse_child(Bookmark(attrs, self, 'div', parser))
- elif name == 'comment':
- self.parse_child(Comment(attrs, self))
- elif name == 'embed' or name == 'embedvar':
- (fname, id) = href_to_fname_id(attrs['href'])
- if parser.follow_embed:
- self.embed_href(parser, fname, id)
- elif name == 'paragraph':
- parser.parse_localized_paragraph(TableContentParagraph(attrs, self), attrs, self)
- elif name == 'section':
- self.parse_child(Section(attrs, self))
- elif name == 'bascode':
- # ignored, do not syntax highlight in table cells
- pass
- else:
- self.unhandled_element(parser, name)
-
- def get_all(self):
- text = ''
- if not self.cellHasChildElement: # an empty element
- if self.parent.isTableHeader: # get from TableRow Element
- role = 'tablehead'
- else:
- role = 'tablecontent'
- text = text + replace_paragraph_role['start'][role]
- text = text + replace_paragraph_role['end'][role]
- text = text + ElementBase.get_all(self)
- return text
-
-class TableRow(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'tablerow', parent)
-
- def start_element(self, parser, name, attrs):
- if name == 'tablecell':
- self.parse_child(TableCell(attrs, self))
- else:
- self.unhandled_element(parser, name)
-
- def get_all(self):
- text = '|-\n' + ElementBase.get_all(self)
- return text
-
-class BasicCode(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'bascode', parent)
-
- def start_element(self, parser, name, attrs):
- if name == 'paragraph':
- parser.parse_localized_paragraph(BasicCodeParagraph(attrs, self), attrs, self)
- else:
- self.unhandled_element(parser, name)
-
- def get_all(self):
- text = '<source lang="oobas">\n' + ElementBase.get_all(self) + '</source>\n\n'
- return text
-
-class Table(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'table', parent)
-
- def start_element(self, parser, name, attrs):
- if name == 'comment':
- self.parse_child(Comment(attrs, self))
- elif name == 'tablerow':
- self.parse_child(TableRow(attrs, self))
- else:
- self.unhandled_element(parser, name)
-
- def get_all(self):
- # + ' align="left"' etc.?
- text = '{| class="wikitable"\n' + \
- ElementBase.get_all(self) + \
- '|}\n\n'
- return text
-
-class ListItem(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'listitem', parent)
-
- def start_element(self, parser, name, attrs):
- if name == 'bookmark':
- self.parse_child(Bookmark(attrs, self, 'span', parser))
- elif name == 'embed' or name == 'embedvar':
- (fname, id) = href_to_fname_id(attrs['href'])
- if parser.follow_embed:
- self.embed_href(parser, fname, id)
- elif name == 'paragraph':
- parser.parse_localized_paragraph(ListItemParagraph(attrs, self), attrs, self)
- else:
- self.unhandled_element(parser, name)
-
- def get_all(self):
- text = '*'
- postfix = '\n'
- if self.parent.startwith > 0:
- text = '<li>'
- postfix = '</li>'
- elif self.parent.type == 'ordered':
- text = '#'
-
- # add the text itself
- linebreak = False
- for i in self.objects:
- if linebreak:
- text = text + '<br/>'
- text = text + i.get_all()
- linebreak = True
-
- return text + postfix
-
-class List(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'list', parent)
-
- self.type = attrs['type']
- try:
- self.startwith = int(attrs['startwith'])
- except:
- self.startwith = 0
-
- def start_element(self, parser, name, attrs):
- if name == 'listitem':
- self.parse_child(ListItem(attrs, self))
- else:
- self.unhandled_element(parser, name)
-
- def get_all(self):
- text = ""
- if self.startwith > 0:
- text = text + '<ol start="%d">\n'% self.startwith
-
- text = text + ElementBase.get_all(self)
-
- if self.startwith > 0:
- text = text + '\n</ol>\n'
- else:
- text = text + '\n'
- return text
-
-# To handle elements that should be completely ignored
-class Ignore(ElementBase):
- def __init__(self, attrs, parent, element_name):
- ElementBase.__init__(self, element_name, parent)
-
-class OrigTitle(TextElementBase):
- def __init__(self, attrs, parent):
- TextElementBase.__init__(self, attrs, parent, 'title', '{{OrigLang|', '}}\n', True)
-
-class Title(TextElementBase):
- def __init__(self, attrs, parent, localized_title):
- TextElementBase.__init__(self, attrs, parent, 'title', '{{Lang|', '}}\n', True)
- self.localized_title = localized_title
-
- def get_all(self):
- if self.localized_title != '':
- self.text = self.localized_title
- return TextElementBase.get_all(self)
-
-class Topic(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'topic', parent)
-
- def start_element(self, parser, name, attrs):
- if name == 'title':
- if parser.lang == '':
- self.parse_child(OrigTitle(attrs, self))
- else:
- self.parse_child(Title(attrs, self, get_localized_text(parser.filename, 'tit')))
- elif name == 'filename':
- self.parse_child(Ignore(attrs, self, name))
- else:
- self.unhandled_element(parser, name)
-
-class Meta(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'meta', parent)
-
- def start_element(self, parser, name, attrs):
- if name == 'topic':
- self.parse_child(Topic(attrs, self))
- elif name == 'history' or name == 'lastedited':
- self.parse_child(Ignore(attrs, self, name))
- else:
- self.unhandled_element(parser, name)
-
-class Section(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'section', parent)
- self.id = attrs['id']
-
- def start_element(self, parser, name, attrs):
- if name == 'bookmark':
- self.parse_child(Bookmark(attrs, self, 'div', parser))
- elif name == 'comment':
- self.parse_child(Comment(attrs, self))
- elif name == 'embed' or name == 'embedvar':
- (fname, id) = href_to_fname_id(attrs['href'])
- if parser.follow_embed:
- self.embed_href(parser, fname, id)
- elif name == 'list':
- self.parse_child(List(attrs, self))
- elif name == 'paragraph':
- parser.parse_paragraph(attrs, self)
- elif name == 'section':
- # sections can be nested
- self.parse_child(Section(attrs, self))
- elif name == 'switch':
- self.parse_child(Switch(attrs, self, parser.embedding_app))
- elif name == 'table':
- self.parse_child(Table(attrs, self))
- elif name == 'bascode':
- self.parse_child(BasicCode(attrs, self))
- else:
- self.unhandled_element(parser, name)
-
- def get_all(self):
- mapping = ''
- try:
- mapping = section_id_mapping[self.id]
- except:
- pass
-
- # some of the section ids are used as real id's, some of them have
- # function (like relatetopics), and have to be templatized
- text = ''
- if mapping != '':
- text = '{{%s|%s}}\n\n'% (mapping, \
- escape_equals_sign(ElementBase.get_all(self)))
- else:
- text = ElementBase.get_all(self)
-
- return text
-
- def get_variable(self, id):
- var = ElementBase.get_variable(self, id)
- if var != None:
- return var
- if id == self.id:
- return self
- return None
-
-class Sort(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'sort', parent)
-
- try:
- self.order = attrs['order']
- except:
- self.order = 'asc'
-
- def start_element(self, parser, name, attrs):
- if name == 'section':
- self.parse_child(Section(attrs, self))
- else:
- self.unhandled_element(parser, name)
-
- def get_all(self):
- rev = False
- if self.order == 'asc':
- rev = True
- self.objects = sorted(self.objects, key=lambda obj: obj.id, reverse=rev)
-
- return ElementBase.get_all(self)
-
-class Link(ElementBase):
- def __init__(self, attrs, parent, lang):
- ElementBase.__init__(self, 'link', parent)
-
- self.link = attrs['href']
- try:
- self.lname = attrs['name']
- except:
- self.lname = self.link[self.link.rfind("/")+1:]
- # Override lname
- self.default_name = self.lname
- (self.lname, self.fragment) = get_link_filename(self.link, self.lname)
- self.wikitext = ""
- self.lang = lang
-
- def char_data(self, parser, data):
- self.wikitext = self.wikitext + data
-
- def get_all(self):
- if self.wikitext == "":
- self.wikitext = self.default_name
-
- self.wikitext = replace_text(self.wikitext)
- if self.link.find("http") == 0:
- text = '[%s %s]'% (self.link, self.wikitext)
- elif self.lang != '':
- text = '[[%s/%s%s|%s]]'% (self.lname, self.lang, self.fragment, self.wikitext)
- else:
- text = '[[%s%s|%s]]'% (self.lname, self.fragment, self.wikitext)
- return text
-
-class SwitchInline(ElementBase):
- def __init__(self, attrs, parent, app):
- ElementBase.__init__(self, 'switchinline', parent)
- self.switch = attrs['select']
- self.embedding_app = app
-
- def start_element(self, parser, name, attrs):
- if name == 'caseinline':
- self.parse_child(CaseInline(attrs, self, False))
- elif name == 'defaultinline':
- self.parse_child(CaseInline(attrs, self, True))
- else:
- self.unhandled_element(parser, name)
-
- def get_all(self):
- if len(self.objects) == 0:
- return ''
- elif self.switch == 'sys':
- system = {'MAC':'', 'UNIX':'', 'WIN':'', 'default':''}
- for i in self.objects:
- if i.case == 'MAC' or i.case == 'UNIX' or \
- i.case == 'WIN' or i.case == 'default':
- system[i.case] = i.get_all()
- elif i.case == 'OS2':
- # ignore, there is only one mention of OS2, which is a
- # 'note to translators', and no meat
- pass
- elif i.case == 'HIDE_HERE':
- # do what the name suggest ;-)
- pass
- else:
- sys.stderr.write('Unhandled "%s" case in "sys" switchinline.\n'% \
- i.case )
- text = '{{System'
- for i in [['default', 'default'], ['MAC', 'mac'], \
- ['UNIX', 'unx'], ['WIN', 'win']]:
- if system[i[0]] != '':
- text = '%s|%s=%s'% (text, i[1], system[i[0]])
- return text + '}}'
- elif self.switch == 'appl':
- # we want directly use the right text, when inlining something
- # 'shared' into an 'app'
- if self.embedding_app == '':
- text = ''
- default = ''
- for i in self.objects:
- appls = {'BASIC':'Basic', 'CALC':'Calc', \
- 'CHART':'Chart', 'DRAW':'Draw', \
- 'IMAGE':'Draw', 'IMPRESS': 'Impress', \
- 'MATH':'Math', 'WRITER':'Writer', \
- 'OFFICE':'', 'default':''}
- try:
- app = appls[i.case]
- all = i.get_all()
- if all == '':
- pass
- elif app == '':
- default = all
- else:
- text = text + '{{WhenIn%s|%s}}'% (app, escape_equals_sign(all))
- except:
- sys.stderr.write('Unhandled "%s" case in "appl" switchinline.\n'% \
- i.case)
-
- if text == '':
- text = default
- elif default != '':
- text = text + '{{WhenDefault|%s}}'% escape_equals_sign(default)
-
- return text
- else:
- for i in self.objects:
- if i.case == self.embedding_app:
- return i.get_all()
-
- return ''
-
-class Case(ElementBase):
- def __init__(self, attrs, parent, is_default):
- ElementBase.__init__(self, 'case', parent)
-
- if is_default:
- self.name = 'default'
- self.case = 'default'
- else:
- self.case = attrs['select']
-
- def start_element(self, parser, name, attrs):
- if name == 'bookmark':
- self.parse_child(Bookmark(attrs, self, 'div', parser))
- elif name == 'comment':
- self.parse_child(Comment(attrs, self))
- elif name == 'embed' or name == 'embedvar':
- if parser.follow_embed:
- (fname, id) = href_to_fname_id(attrs['href'])
- self.embed_href(parser, fname, id)
- elif name == 'list':
- self.parse_child(List(attrs, self))
- elif name == 'paragraph':
- parser.parse_paragraph(attrs, self)
- elif name == 'section':
- self.parse_child(Section(attrs, self))
- elif name == 'table':
- self.parse_child(Table(attrs, self))
- else:
- self.unhandled_element(parser, name)
-
-class Switch(SwitchInline):
- def __init__(self, attrs, parent, app):
- SwitchInline.__init__(self, attrs, parent, app)
- self.name = 'switch'
-
- def start_element(self, parser, name, attrs):
- self.embedding_app = parser.embedding_app
- if name == 'case':
- self.parse_child(Case(attrs, self, False))
- elif name == 'default':
- self.parse_child(Case(attrs, self, True))
- else:
- self.unhandled_element(parser, name)
-
-class Item(ElementBase):
- replace_type = \
- {'start':{'input': '<code>',
- 'keycode': '{{KeyCode|',
- 'tasto': '{{KeyCode|',
- 'litera': '<code>',
- 'literal': '<code>',
- 'menuitem': '{{MenuItem|',
- 'mwnuitem': '{{MenuItem|',
- 'OpenOffice.org': '',
- 'productname': '',
- 'unknown': '<code>'
- },
- 'end':{'input': '</code>',
- 'keycode': '}}',
- 'tasto': '}}',
- 'litera': '</code>',
- 'literal': '</code>',
- 'menuitem': '}}',
- 'mwnuitem': '}}',
- 'OpenOffice.org': '',
- 'productname': '',
- 'unknown': '</code>'
- },
- 'templ':{'input': False,
- 'keycode': True,
- 'tasto': True,
- 'litera': False,
- 'literal': False,
- 'menuitem': True,
- 'mwnuitem': True,
- 'OpenOffice.org': False,
- 'productname': False,
- 'unknown': False
- }}
-
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'item', parent)
-
- try:
- self.type = attrs['type']
- except:
- self.type = 'unknown'
- self.text = ''
-
- def char_data(self, parser, data):
- self.text = self.text + data
-
- def get_all(self):
- try:
- text = ''
- if self.replace_type['templ'][self.type]:
- text = escape_equals_sign(replace_text(self.text))
- else:
- text = replace_text(self.text)
- return self.replace_type['start'][self.type] + \
- text + \
- self.replace_type['end'][self.type]
- except:
- sys.stderr.write('Unhandled item type "%s".\n'% self.type)
-
- return replace_text(self.text)
-
-
-class Paragraph(ElementBase):
- def __init__(self, attrs, parent):
- ElementBase.__init__(self, 'paragraph', parent)
-
- try:
- self.role = attrs['role']
- except:
- self.role = 'paragraph'
-
- try:
- self.id = attrs['id']
- except:
- self.id = ""
-
- try:
- self.level = int(attrs['level'])
- except:
- self.level = 0
-
- self.is_first = (len(self.parent.objects) == 0)
-
- def start_element(self, parser, name, attrs):
- if name == 'ahelp':
- try:
- if attrs['visibility'] == 'hidden':
- self.parse_child(Ignore(attrs, self, name))
- except:
- pass
- elif name == 'br':
- self.parse_child(Br(attrs, self))
- elif name == 'comment':
- self.parse_child(Comment(attrs, self))
- elif name == 'emph':
- self.parse_child(Emph(attrs, self))
- elif name == 'sup':
- self.parse_child(Sup(attrs, self))
- elif name == 'embedvar':
- if parser.follow_embed:
- (fname, id) = href_to_fname_id(attrs['href'])
- self.embed_href(parser, fname, id)
- elif name == 'help-id-missing':
- self.parse_child(HelpIdMissing(attrs, self))
- elif name == 'image':
- self.parse_child(Image(attrs, self))
- elif name == 'item':
- self.parse_child(Item(attrs, self))
- elif name == 'link':
- self.parse_child(Link(attrs, self, parser.lang))
- elif name == 'localized':
- # we ignore this tag, it is added arbitrary for the paragraphs
- # that come from .sdf files
- pass
- elif name == 'switchinline':
- self.parse_child(SwitchInline(attrs, self, parser.embedding_app))
- elif name == 'variable':
- self.parse_child(Variable(attrs, self))
- else:
- self.unhandled_element(parser, name)
-
- def char_data(self, parser, data):
- if self.role == 'paragraph' or self.role == 'heading' or \
- self.role == 'listitem' or self.role == 'variable':
- if data != '' and data[0] == ' ':
- data = ' ' + data.lstrip()
- data = data.replace('\n', ' ')
-
- if len(data):
- self.objects.append(Text(data))
-
- def get_all(self):
- role = self.role
- if role == 'heading':
- if self.level <= 0:
- sys.stderr.write('Heading, but the level is %d.\n'% self.level)
- elif self.level < 6:
- role = 'heading%d'% self.level
- else:
- role = 'heading6'
-
- # if we are not the first para in the table, we need special handling
- if not self.is_first and role.find('table') == 0:
- if role == 'tablecontentcode':
- role = 'tablenextparacode'
- else:
- role = 'tablenextpara'
-
- # the text itself
- children = ElementBase.get_all(self)
- if self.role != 'emph' and self.role != 'bascode':
- children = children.strip()
-
- if len(children) == 0:
- return ''
-
- # prepend the markup according to the role
- text = ''
- try:
- text = text + replace_paragraph_role['start'][role]
- except:
- sys.stderr.write( "Unknown paragraph role start: " + role + "\n" )
-
- if replace_paragraph_role['templ'][role]:
- text = text + escape_equals_sign(children)
- else:
- text = text + children
-
- # append the markup according to the role
- try:
- text = text + replace_paragraph_role['end'][role]
- except:
- sys.stderr.write( "Unknown paragraph role end: " + role + "\n" )
-
- return text
-
-class Variable(Paragraph):
- def __init__(self, attrs, parent):
- Paragraph.__init__(self, attrs, parent)
- self.name = 'variable'
- self.role = 'variable'
- self.id = attrs['id']
-
- def get_variable(self, id):
- if id == self.id:
- return self
- return None
-
-class CaseInline(Paragraph):
- def __init__(self, attrs, parent, is_default):
- Paragraph.__init__(self, attrs, parent)
-
- self.role = 'null'
- if is_default:
- self.name = 'defaultinline'
- self.case = 'default'
- else:
- self.name = 'caseinline'
- self.case = attrs['select']
-
-class Emph(Paragraph):
- def __init__(self, attrs, parent):
- Paragraph.__init__(self, attrs, parent)
- self.name = 'emph'
- self.role = 'emph'
-
- def get_all(self):
- text = Paragraph.get_all(self)
- if len(text):
- return "'''" + text + "'''"
- return ''
-
-class Sup(Paragraph):
- def __init__(self, attrs, parent):
- Paragraph.__init__(self, attrs, parent)
- self.name = 'sup'
- self.role = 'sup'
-
- def get_all(self):
- text = Paragraph.get_all(self)
- if len(text):
- return "<sup>" + text + "</sup>"
- return ''
-
-class ListItemParagraph(Paragraph):
- def __init__(self, attrs, parent):
- Paragraph.__init__(self, attrs, parent)
- self.role = 'listitem'
-
-class BasicCodeParagraph(Paragraph):
- def __init__(self, attrs, parent):
- Paragraph.__init__(self, attrs, parent)
- self.role = 'bascode'
-
-class TableContentParagraph(Paragraph):
- def __init__(self, attrs, parent):
- Paragraph.__init__(self, attrs, parent)
- if self.role != 'tablehead' and self.role != 'tablecontent':
- if self.role == 'code':
- self.role = 'tablecontentcode'
- elif self.role == 'bascode':
- self.role = 'tablecontentcode'
- else:
- self.role = 'tablecontent'
- if self.role == 'tablehead':
- self.parent.parent.isTableHeader = True # self.parent.parent is TableRow Element
- else:
- self.parent.parent.isTableHeader = False
-
-class ParserBase:
- def __init__(self, filename, follow_embed, embedding_app, current_app, wiki_page_name, lang, head_object, buffer):
- self.filename = filename
- self.follow_embed = follow_embed
- self.embedding_app = embedding_app
- self.current_app = current_app
- self.wiki_page_name = wiki_page_name
- self.lang = lang
- self.head_obj = head_object
-
- p = xml.parsers.expat.ParserCreate()
- p.StartElementHandler = self.start_element
- p.EndElementHandler = self.end_element
- p.CharacterDataHandler = self.char_data
-
- p.Parse(buffer)
-
- def start_element(self, name, attrs):
- self.head_obj.get_curobj().start_element(self, name, attrs)
-
- def end_element(self, name):
- self.head_obj.get_curobj().end_element(self, name)
-
- def char_data(self, data):
- self.head_obj.get_curobj().char_data(self, data)
-
- def get_all(self):
- return self.head_obj.get_all()
-
- def get_variable(self, id):
- return self.head_obj.get_variable(id)
-
- def parse_localized_paragraph(self, paragraph, attrs, obj):
- localized_text = ''
- try:
- localized_text = get_localized_text(self.filename, attrs['id'])
- except:
- pass
-
- if localized_text != '':
- # parse the localized text
- text = u'<?xml version="1.0" encoding="UTF-8"?><localized>' + localized_text + '</localized>'
- ParserBase(self.filename, self.follow_embed, self.embedding_app, \
- self.current_app, self.wiki_page_name, self.lang, \
- paragraph, text.encode('utf-8'))
- # add it to the overall structure
- obj.objects.append(paragraph)
- # and ignore the original text
- obj.parse_child(Ignore(attrs, obj, 'paragraph'))
- else:
- obj.parse_child(paragraph)
-
- def parse_paragraph(self, attrs, obj):
- ignore_this = False
- try:
- if attrs['role'] == 'heading' and int(attrs['level']) == 1 \
- and self.ignore_heading and self.follow_embed:
- self.ignore_heading = False
- ignore_this = True
- except:
- pass
-
- if ignore_this:
- obj.parse_child(Ignore(attrs, obj, 'paragraph'))
- else:
- self.parse_localized_paragraph(Paragraph(attrs, obj), attrs, obj)
-
-class XhpParser(ParserBase):
- def __init__(self, filename, follow_embed, embedding_app, wiki_page_name, lang):
- # we want to ignore the 1st level="1" heading, because in most of the
- # cases, it is the only level="1" heading in the file, and it is the
- # same as the page title
- self.ignore_heading = True
-
- current_app = ''
- self.current_app_raw = ''
- for i in [['sbasic', 'BASIC'], ['scalc', 'CALC'], \
- ['sdatabase', 'DATABASE'], ['sdraw', 'DRAW'], \
- ['schart', 'CHART'], ['simpress', 'IMPRESS'], \
- ['smath', 'MATH'], ['swriter', 'WRITER']]:
- if filename.find('/%s/'% i[0]) >= 0:
- self.current_app_raw = i[0]
- current_app = i[1]
- break
-
- if embedding_app == '':
- embedding_app = current_app
-
- file = codecs.open(filename, "r", "utf-8")
- buf = file.read()
- file.close()
-
- ParserBase.__init__(self, filename, follow_embed, embedding_app,
- current_app, wiki_page_name, lang, XhpFile(), buf.encode('utf-8'))
-
-def loadallfiles(filename):
- global titles
- titles = []
- file = codecs.open(filename, "r", "utf-8")
- for line in file:
- title = line.split(";", 2)
- titles.append(title)
- file.close()
-
-class WikiConverter(Thread):
- def __init__(self, inputfile, wiki_page_name, lang, outputfile):
- Thread.__init__(self)
- self.inputfile = inputfile
- self.wiki_page_name = wiki_page_name
- self.lang = lang
- self.outputfile = outputfile
-
- def run(self):
- parser = XhpParser(self.inputfile, True, '', self.wiki_page_name, self.lang)
- file = codecs.open(self.outputfile, "wb", "utf-8")
- file.write(parser.get_all())
- file.close()
-
-def write_link(r, target):
- fname = 'wiki/%s'% r
- try:
- file = open(fname, "w")
- file.write('#REDIRECT [[%s]]\n'% target)
- file.close()
- except:
- sys.stderr.write('Unable to write "%s".\n'%'wiki/%s'% fname)
-
-def write_redirects():
- print 'Generating the redirects...'
- written = {}
- # in the first pass, immediately writte the links that are embedded, so that
- # we can always point to that source versions
- for redir in redirects:
- app = redir[0]
- redirect = redir[1]
- target = redir[2]
- authoritative = redir[3]
-
- if app != '':
- r = '%s/%s'% (app, redirect)
- if authoritative:
- write_link(r, target)
- written[r] = True
- else:
- try:
- written[r]
- except:
- written[r] = False
-
- # in the second pass, output the wiki links
- for redir in redirects:
- app = redir[0]
- redirect = redir[1]
- target = redir[2]
-
- if app == '':
- for i in ['swriter', 'scalc', 'simpress', 'sdraw', 'smath', \
- 'schart', 'sbasic', 'sdatabase']:
- write_link('%s/%s'% (i, redirect), target)
- else:
- r = '%s/%s'% (app, redirect)
- if not written[r]:
- write_link(r, target)
-
-# Main Function
-def convert(generate_redirects, lang, sdf_file):
- if lang == '':
- print 'Generating the main wiki pages...'
- else:
- print 'Generating the wiki pages for language %s...'% lang
-
- global redirects
- redirects = []
- global images
- images = set()
-
- loadallfiles("alltitles.csv")
-
- if lang != '':
- sys.stderr.write('Using localizations from "%s"\n'% sdf_file)
- if not load_localization_data(sdf_file):
- return
-
- for title in titles:
- while threading.active_count() > max_threads:
- time.sleep(0.001)
-
- infile = title[0].strip()
- wikiname = title[1].strip()
- articledir = 'wiki/' + wikiname
- try:
- os.mkdir(articledir)
- except:
- pass
-
- outfile = ''
- if lang != '':
- wikiname = '%s/%s'% (wikiname, lang)
- outfile = '%s/%s'% (articledir, lang)
- else:
- outfile = '%s/MAIN'% articledir
-
- try:
- file = open(outfile, 'r')
- except:
- try:
- wiki = WikiConverter(infile, wikiname, lang, outfile)
- wiki.start()
- continue
- except:
- print 'Failed to convert "%s" into "%s".\n'% \
- (infile, outfile)
- sys.stderr.write('Warning: Skipping: %s > %s\n'% (infile, outfile))
- file.close()
-
- # wait for everyone to finish
- while threading.active_count() > 1:
- time.sleep(0.001)
-
- if lang == '':
- # set of the images used here
- print 'Generating "images.txt", the list of used images...'
- file = open('images.txt', "w")
- for image in images:
- file.write('%s\n'% image)
- file.close()
-
- # generate the redirects
- if generate_redirects:
- write_redirects()
-
-# vim:set shiftwidth=4 softtabstop=4 expandtab: