summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Vander Stichele <thomas@apestaart.org>2002-03-03 16:25:11 +0000
committerThomas Vander Stichele <thomas@apestaart.org>2002-03-03 16:25:11 +0000
commitcac482f223efe6b12c3726b6a615c76b92ce9fba (patch)
treef8d1d5eae1bc1eb6a50d82eea2aa183a33c96e51
parent63410485440da62071c972fd2be4643bb6c40a95 (diff)
thanks to jhdalin for this
Original commit message from CVS: thanks to jhdalin for this
-rw-r--r--docs/devhelp/html2xml.py269
1 files changed, 269 insertions, 0 deletions
diff --git a/docs/devhelp/html2xml.py b/docs/devhelp/html2xml.py
new file mode 100644
index 0000000000..aa425a1fc2
--- /dev/null
+++ b/docs/devhelp/html2xml.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python
+import formatter
+import htmllib
+import os
+import os.path
+import sgmllib
+import string
+import sys
+
+base_dict = {
+ 'glib' : 'http://developer.gnome.org/doc/API/glib/',
+ 'gdk' : 'http://developer.gnome.org/doc/API/gdk/',
+ 'gtk' : 'http://developer.gnome.org/doc/API/gtk/',
+ 'gdk-pixbuf' : 'http://developer.gnome.org/doc/API/gdk-pixbuf/',
+ 'glib-2.0' : 'http://developer.gnome.org/doc/API/2.0/glib/',
+ 'gobject-2.0' : 'http://developer.gnome.org/doc/API/2.0/gobject/',
+ 'atk' : 'http://developer.gnome.org/doc/API/2.0/atk/',
+ 'pango' : 'http://developer.gnome.org/doc/API/2.0/pango/',
+ 'gdk-2.0' : 'http://developer.gnome.org/doc/API/2.0/gdk/',
+ 'gtk-2.0' : 'http://developer.gnome.org/doc/API/2.0/gtk/',
+ 'gdk-pixbuf-2.0' : 'http://developer.gnome.org/doc/API/2.0/gdk-pixbuf/',
+ 'gnome-vfs-2.0' : 'http://developer.gnome.org/doc/API/2.0/gnome-vfs/',
+ 'libxml-2' : 'http://xmlsoft.org/html/',
+ 'libgnome' : 'http://developer.gnome.org/doc/API/2.0/libgnome/',
+ 'libgnomeui' : 'http://developer.gnome.org/doc/API/2.0/libgnomeui/',
+}
+
+def does_dict_have_keys (dict, keys):
+ for key in keys:
+ if not dict.has_key (key):
+ return 0
+ if len(dict) != len(keys):
+ return 0
+ return 1
+
+class BookParser (sgmllib.SGMLParser):
+ def __init__ (self):
+ sgmllib.SGMLParser.__init__ (self)
+ self.a = self.parents = []
+ self.dict = {}
+ self.last = self.link = ""
+ self.is_a = self.level = 0
+ self.first = 1
+
+ def unknown_starttag (self, tag, attrs):
+ if tag == 'a':
+ self.is_a = 1
+ for attr in attrs:
+ if attr[0] == "href":
+ self.link = attr[1]
+ break
+
+ if tag in ['dd', 'ul']:
+ self.parents.append (self.last)
+ self.level = self.level + 1
+
+ def unknown_endtag (self, tag):
+ if tag == 'a':
+ self.is_a = 0
+
+ if tag in ['dd', 'ul']:
+ self.level = self.level - 1
+ self.parents.pop()
+
+ def handle_data (self, data):
+ data = string.strip (data)
+ if not data or data in [ ">", "<" ]:
+ return
+
+ if self.first:
+ self.dict['name'] = data
+ self.first = 0
+ return
+
+ if data == self.dict['name'] or data in [ "Next Page", "Previous Page", "Home", "Next"]:
+ return
+
+ if len (self.parents) == 0:
+ dict = self.dict
+ elif len (self.parents) == 1:
+ dict = self.dict[self.parents[0]]
+ elif len (self.parents) == 2:
+ dict = self.dict[self.parents[0]][self.parents[1]]
+ elif len (self.parents) == 3:
+ dict = self.dict[self.parents[0]][self.parents[1]][self.parents[2]]
+ else:
+ dict = None
+
+ if self.is_a:
+ if dict == None:
+ return
+
+ if not dict.has_key (data):
+ dict[data] = {}
+ if not dict.has_key ('order'):
+ dict['order'] = []
+ dict['order'].append (data)
+ dict[data]['link'] = self.link
+
+ self.last = data
+
+class FunctionParser (htmllib.HTMLParser):
+ SKIP_DATA = [ "Next Page", "Previous Page", "Home", "Next", "Up", "<", ">", "Figure 1", "[1]"]
+ is_a = 0
+ a = []
+ link = ""
+ def anchor_bgn (self, href, name, type):
+ self.is_a = 1
+ self.link = href
+
+ def anchor_end (self):
+ self.is_a = 0
+
+ def handle_data (self, data):
+ data = string.strip (data)
+ if data in self.SKIP_DATA:
+ return
+
+ if not '#' in self.link:
+ return
+
+ if self.link[:2] == "..":
+ return
+
+ if self.is_a and self.link:
+ self.a.append ((data, self.link))
+
+def parse_file_for_functions (fd):
+ try:
+ p = FunctionParser (formatter.NullFormatter ())
+ p.feed (fd.read ())
+ p.close ()
+ except KeyboardInterrupt:
+ raise SystemExit
+ return p.a
+
+def output_chapter (fd, main_dict, level=1):
+ if not main_dict.has_key ('order'):
+ return
+
+ for sub in main_dict['order']:
+ dict = main_dict[sub]
+
+ if not does_dict_have_keys (dict, ['link']):
+ fd.write (' '*(level*4))
+ fd.write ('<sub name="%s" link="%s">\n' % (sub, dict['link']))
+ output_chapter (fd, dict, level+1)
+ fd.write (' '*(level*4))
+ fd.write ('</sub>\n')
+ else:
+ fd.write (' '*(level*4))
+ fd.write ('<sub name="%s" link="%s"/>\n' % (sub, dict['link']))
+
+def get_functions (dir):
+ function_list = []
+
+ for file in os.listdir (dirname):
+ if file[-5:] != ".html":
+ continue
+
+ fd = open (dirname + os.sep + file)
+ functions = parse_file_for_functions (fd)
+
+ for function in functions:
+ if not function in function_list:
+ function_list.append (function)
+ return function_list
+
+def get_index_filename (dir):
+ dir = os.path.abspath (dir)
+
+ filename = dir + os.sep + 'index.html'
+ if os.path.exists (filename):
+ return filename
+
+ filename = dir + os.sep + 'book1.html'
+ if os.path.exists (filename):
+ return filename
+ else:
+ raise SystemExit, "Can't find an index file"
+
+def output_xml (fd, filename, bookname, version):
+ global base_dict
+
+ templ = \
+ '<book title="%(title)s"\n' \
+ ' name="%(name)s"\n%(version)s' \
+ ' base="%(base)s"\n' \
+ ' link="%(link)s">\n\n'
+
+
+ link = os.path.basename (filename)
+
+ input_fd = open (filename)
+ p = BookParser ()
+ p.feed (input_fd.read ())
+ p.close ()
+
+ fd.write ('<?xml version="1.0"?>\n')
+
+ full = book_full_name (bookname, version)
+ if base_dict.has_key (full):
+ base = base_dict[full]
+ else:
+ base = ""
+
+ if version:
+ version = ' version="%s"\n' % version
+
+ fd.write (templ % { 'title' : p.dict['name'],
+ 'name' : bookname,
+ 'version' : version,
+ 'base' : base,
+ 'link' : link })
+
+ fd.write ('<chapters>\n')
+ output_chapter (fd, p.dict)
+ fd.write ('</chapters>\n\n')
+
+ fd.flush ()
+
+def output_functions (fd, dirname):
+ functions = get_functions (dirname)
+ functions.sort ()
+
+ fd.write ('<functions>\n')
+ for name, link in functions:
+ fd.write (' <function name="%s" link="%s"/>\n' % (name, link))
+ fd.write ('</functions>\n\n')
+
+def dirname_to_bookname (dirname):
+ dirname = os.path.basename (dirname)
+ if string.find (dirname, '-') == -1:
+ return (dirname, "")
+ else:
+ pos = string.rfind (dirname, '-')
+ return (dirname[:pos], dirname[pos+1:])
+
+def book_full_name (book, version):
+ if version:
+ return "%s-%s" % (book, version)
+ else:
+ return book
+
+if __name__ == "__main__":
+ if len (sys.argv) != 2 and \
+ len (sys.argv) != 3:
+ print 'Usage: %s dirname [output]' % sys.argv[0]
+ raise SystemExit
+
+ index_filename = get_index_filename (sys.argv[1])
+ dirname = os.path.abspath (sys.argv[1])
+ book, version = dirname_to_bookname (sys.argv[1])
+ if len (sys.argv) != 2:
+ if sys.argv[2] == "-s":
+ fd = sys.stdout
+ else:
+ output_filename = sys.argv[2]
+ fd = open (output_filename, 'w')
+ else:
+ output_filename = book_full_name (book, version) + '.devhelp'
+ fd = open (output_filename, 'w')
+
+ output_xml (fd, index_filename, book, version)
+ output_functions (fd, dirname)
+ fd.write ('</book>\n')
+ fd.close ()
+
+