#!/usr/bin/env python # -*- coding: utf-8 -*- # # This file is part of the LibreOffice project. # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # #This digs through a pile of bugzilla's and populates the cwd with a big #collection of bug-docs in per-filetype dirs with bug-ids as names with #prefixes to indicate which bug-tracker, e.g. # #fdo-bugid-X.suffix #rhbz-bugid-X.suffix #moz-bugid-X.suffix # #where X is the n'th attachment of that type in the bug from __future__ import print_function import feedparser import base64 import re import os, os.path import sys try: from urllib.request import urlopen except: from urllib import urlopen try: import xmlrpc.client as xmlrpclib except: import xmlrpclib from xml.dom import minidom from xml.sax.saxutils import escape def urlopen_retry(url): maxretries = 3 for i in range(maxretries + 1): try: return urlopen(url) except IOError as e: print("caught IOError: " + str(e)) if maxretries == i: raise print("retrying...") def get_from_bug_url_via_xml(url, mimetype, prefix, suffix): id = url.rsplit('=', 2)[1] print("id is " + prefix + id + " " + suffix) if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix): print("assuming " + id + " is up to date") else: print("parsing " + id) sock = urlopen_retry(url+"&ctype=xml") dom = minidom.parse(sock) sock.close() attachmentid=0 for attachment in dom.getElementsByTagName('attachment'): attachmentid += 1 print(" mimetype is", end=' ') for node in attachment.childNodes: if node.nodeName == 'type': print(node.firstChild.nodeValue, end=' ') if node.firstChild.nodeValue.lower() != mimetype.lower(): print('skipping') break elif node.nodeName == 'data': # check if attachment is deleted (i.e. https://bugs.kde.org/show_bug.cgi?id=53343&ctype=xml) if not node.firstChild: print('deleted attachment, skipping') continue download = suffix + '/' +prefix + id + '-' + str(attachmentid) + '.' + suffix print('downloading as ' + download) f = open(download, 'wb') f.write(base64.b64decode(node.firstChild.nodeValue)) f.close() break def get_novell_bug_via_xml(url, mimetype, prefix, suffix): id = url.rsplit('=', 2)[1] print("id is " + prefix + id + " " + suffix) if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix): print("assuming " + id + " is up to date") else: print("parsing " + id) sock = urlopen_retry(url+"&ctype=xml") dom = minidom.parse(sock) sock.close() attachmentid=0 for comment in dom.getElementsByTagName('thetext'): commentText = comment.firstChild.nodeValue match = re.search(r".*Created an attachment \(id=([0-9]+)\)", commentText) if not match: continue attachmentid += 1 download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix if os.path.isfile(download): print("assuming " + download + " is up to date") continue realAttachmentId = match.group(1) handle = urlopen_retry(novellattach + realAttachmentId) if not handle: print("attachment %s is not accessible" % realAttachmentId) continue print(" mimetype is", end=' ') info = handle.info() if info.get_content_type: remoteMime = info.get_content_type() else: remoteMime = info.gettype() print(remoteMime, end=' ') if remoteMime != mimetype: print("skipping") continue print('downloading as ' + download) f = open(download, 'wb') f.write(handle.read()) f.close() def get_through_rpc_query(rpcurl, showurl, mimetype, prefix, suffix): try: os.mkdir(suffix) except: pass try: proxy = xmlrpclib.ServerProxy(rpcurl) query = dict() query['column_list']='bug_id' query['query_format']='advanced' query['field0-0-0']='attachments.mimetype' query['type0-0-0']='equals' query['value0-0-0']=mimetype result = proxy.Bug.search(query) bugs = result['bugs'] print(str(len(bugs)) + ' bugs to process') for bug in bugs: url = showurl + str(bug['id']) get_from_bug_url_via_xml(url, mimetype, prefix, suffix) except xmlrpclib.Fault as err: print("A fault occurred") print("Fault code: %s" % err.faultCode) print(err.faultString) def get_through_rss_query_url(url, mimetype, prefix, suffix): try: os.mkdir(suffix) except: pass d = feedparser.parse(url) #Getting detailed bug information and downloading an attachment body is not possible without logging in to Novell bugzilla #get_novell_bug_via_xml function is a workaround for that situation get_bug_function = get_novell_bug_via_xml if prefix == "novell" else get_from_bug_url_via_xml for entry in d['entries']: try: get_bug_function(entry['id'], mimetype, prefix, suffix) except KeyboardInterrupt: raise # Ctrl+C should work except: print(entry['id'] + " failed: " + str(sys.exc_info()[0])) pass def get_through_rss_query(queryurl, mimetype, prefix, suffix): url = queryurl + '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype) + '&ctype=rss' print('url is ' + url) get_through_rss_query_url(url, mimetype, prefix, suffix) def get_launchpad_bugs(prefix): #launchpadlib python module is required to download launchpad attachments from launchpadlib.launchpad import Launchpad launchpad = Launchpad.login_anonymously("attachmentdownload", "production") ubuntu = launchpad.distributions["ubuntu"] #since searching bugs having attachments with specific mimetypes is not available in launchpad API #we're iterating over all bugs of the most interesting source packages for pkg in ["libreoffice", "openoffice.org", "abiword", "gnumeric", "koffice", "calligra"]: srcpkg = ubuntu.getSourcePackage(name=pkg) pkgbugs = srcpkg.searchTasks(status=["New", "Fix Committed", "Invalid", "Won't Fix", "Confirmed", "Triaged", "In Progress", "Incomplete", "Incomplete (with response)", "Incomplete (without response)", "Fix Released", "Opinion", "Expired"]) for bugtask in pkgbugs: bug = bugtask.bug id = str(bug.id) print("parsing " + id + " status: " + bugtask.status + " title: " + bug.title[:50]) attachmentid = 0 for attachment in bug.attachments: attachmentid += 1 handle = attachment.data.open() if not handle.content_type in mimetypes: #print "skipping" continue suffix = mimetypes[handle.content_type] if not os.path.isdir(suffix): try: os.mkdir(suffix) except: pass download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix if os.path.isfile(download): print("assuming " + id + " is up to date") break print('mimetype is ' + handle.content_type + ' downloading as ' + download) f = open(download, "w") f.write(handle.read()) f.close() freedesktop = 'http://bugs.libreoffice.org/buglist.cgi' abisource = 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword gnome = 'http://bugzilla.gnome.org/buglist.cgi' # added for gnumeric kde = 'http://bugs.kde.org/buglist.cgi' # added for koffice/calligra openoffice = 'https://issues.apache.org/ooo/buglist.cgi' redhatrpc = 'https://bugzilla.redhat.com/xmlrpc.cgi' redhatbug = 'https://bugzilla.redhat.com/show_bug.cgi?id=' mozilla = 'https://bugzilla.mozilla.org/buglist.cgi' #Novell Bugzilla requires users to log in in order to get details of the bugs such as attachment bodies etc. #As a dirty workaround, we parse comments containing "Created an attachment (id=xxxxxx)" and download attachments manually #python-bugzilla claims that it supports Novell bugzilla login but it's not working right now and novell bugzilla login #system is a nightmare novellattach = 'https://bugzilla.novell.com/attachment.cgi?id=' novell = 'https://bugzilla.novell.com/buglist.cgi' mimetypes = { # ODF 'application/vnd.oasis.opendocument.base': 'odb', 'application/vnd.oasis.opendocument.database': 'odb', 'application/vnd.oasis.opendocument.chart': 'odc', 'application/vnd.oasis.opendocument.chart-template': 'otc', 'application/vnd.oasis.opendocument.formula': 'odf', 'application/vnd.oasis.opendocument.formula-template': 'otf', 'application/vnd.oasis.opendocument.graphics': 'odg', 'application/vnd.oasis.opendocument.graphics-template': 'otg', 'application/vnd.oasis.opendocument.graphics-flat-xml': 'fodg', 'application/vnd.oasis.opendocument.presentation': 'odp', 'application/vnd.oasis.opendocument.presentation-template': 'otp', 'application/vnd.oasis.opendocument.presentation-flat-xml': 'fodp', 'application/vnd.oasis.opendocument.spreadsheet': 'ods', 'application/vnd.oasis.opendocument.spreadsheet-template': 'ots', 'application/vnd.oasis.opendocument.spreadsheet-flat-xml': 'fods', 'application/vnd.oasis.opendocument.text': 'odt', 'application/vnd.oasis.opendocument.text-flat-xml': 'fodt', 'application/vnd.oasis.opendocument.text-master': 'odm', 'application/vnd.oasis.opendocument.text-template': 'ott', 'application/vnd.oasis.opendocument.text-web': 'oth', # OOo XML 'application/vnd.sun.xml.base': 'odb', 'application/vnd.sun.xml.calc': 'sxc', 'application/vnd.sun.xml.calc.template': 'stc', 'application/vnd.sun.xml.chart': 'sxs', 'application/vnd.sun.xml.draw': 'sxd', 'application/vnd.sun.xml.draw.template': 'std', 'application/vnd.sun.xml.impress': 'sxi', 'application/vnd.sun.xml.impress.template': 'sti', 'application/vnd.sun.xml.math': 'sxm', 'application/vnd.sun.xml.writer': 'sxw', 'application/vnd.sun.xml.writer.global': 'sxg', 'application/vnd.sun.xml.writer.template': 'stw', 'application/vnd.sun.xml.writer.web': 'stw', # MSO 'application/rtf': 'rtf', 'text/rtf': 'rtf', 'application/msword': 'doc', 'application/vnd.ms-powerpoint': 'ppt', 'application/vnd.ms-excel': 'xls', 'application/vnd.ms-excel.sheet.binary.macroEnabled.12': 'xlsb', 'application/vnd.ms-excel.sheet.macroEnabled.12': 'xlsm', 'application/vnd.ms-excel.template.macroEnabled.12': 'xltm', 'application/vnd.ms-powerpoint.presentation.macroEnabled.12': 'pptm', 'application/vnd.ms-powerpoint.slide.macroEnabled.12': 'sldm', 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12': 'ppsm', 'application/vnd.ms-powerpoint.template.macroEnabled.12': 'potm', 'application/vnd.ms-word.document.macroEnabled.12': 'docm', 'application/vnd.ms-word.template.macroEnabled.12': 'dotm', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx', 'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx', 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx', 'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx', 'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx', 'application/vnd.openxmlformats-officedocument.presentationml.slide': 'sldx', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx', 'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx', 'application/vnd.visio': 'vsd', 'application/vnd.visio.xml': 'vdx', 'application/x-mspublisher': 'pub', # W3C 'application/xhtml+xml': 'xhtml', 'application/mathml+xml': 'mml', 'text/html': 'html', 'application/docbook+xml': 'docbook', # misc 'text/csv': 'csv', 'text/spreadsheet': 'slk', 'application/vnd.corel-draw': 'cdr', 'application/vnd.lotus-wordpro': 'lwp', 'application/vnd.lotus-1-2-3': 'wks', 'application/vnd.wordperfect': 'wpd', 'application/wordperfect5.1': 'wpd', 'application/vnd.ms-works': 'wps', 'application/clarisworks' : 'cwk', 'application/macwriteii' : 'mw', 'application/vnd.apple.keynote': 'key', 'application/x-iwork-keynote-sffkey': 'key', 'application/x-hwp': 'hwp', 'application/x-aportisdoc': 'pdb', 'application/prs.plucker' : 'pdb_plucker', 'application/vnd.palm' : 'pdb_palm', 'application/x-sony-bbeb' : 'lrf', 'application/x-pocket-word': 'psw', 'application/x-t602': '602', 'application/x-fictionbook+xml': 'fb2', # binfilter 'application/x-starcalc': 'sdc', 'application/vnd.stardivision.calc': 'sdc5', 'application/x-starchart': 'sds', 'application/vnd.stardivision.chart': 'sds5', 'application/x-stardraw': 'sdd_d', 'application/vnd.stardivision.draw': 'sda5', 'application/x-starimpress': 'sdd_i', 'application/vnd.stardivision.impress': 'sdd5', 'application/vnd.stardivision.impress-packed': 'sdp5', 'application/x-starmath': 'smf', 'application/vnd.stardivision.math': 'smf5', 'application/x-starwriter': 'sdw', 'application/vnd.stardivision.writer': 'sdw5', 'application/vnd.stardivision.writer-global': 'sgl5', # relatively uncommon image mimetypes 'image/x-freehand': 'fh', 'image/cgm': 'cgm', 'image/tiff': 'tiff', 'image/vnd.dxf': 'dxf', 'image/x-emf': 'emf', 'image/x-targa': 'tga', 'image/x-sgf': 'sgf', 'image/x-svm': 'svm', 'image/x-wmf': 'wmf', 'image/x-pict': 'pict', 'image/x-cmx': 'cmx', 'image/svg+xml': 'svg', 'image/x-MS-bmp': 'bmp', 'image/x-wpg': 'wpg', 'image/x-eps': 'eps', 'image/x-met': 'met', 'image/x-portable-bitmap': 'pbm', 'image/x-photo-cd': 'pcd', 'image/x-pcx': 'pcx', 'image/x-portable-graymap': 'pgm', 'image/x-portable-pixmap': 'ppm', 'image/vnd.adobe.photoshop': 'psd', 'image/x-cmu-raster': 'ras', 'image/x-xbitmap': 'xbm', 'image/x-xpixmap': 'xpm', } # disabled for now, this would download gigs of pngs/jpegs... common_noncore_mimetypes = { # graphics 'image/gif': 'gif', 'image/jpeg': 'jpeg', 'image/png': 'png', # pdf, etc. 'application/pdf': 'pdf', } for (mimetype,extension) in mimetypes.items(): get_through_rss_query(freedesktop, mimetype, "fdo", extension) for (mimetype,extension) in mimetypes.items(): get_through_rpc_query(redhatrpc, redhatbug, mimetype, "rhbz", extension) for (mimetype,extension) in mimetypes.items(): get_through_rss_query(openoffice, mimetype, "ooo", extension) for (mimetype,extension) in mimetypes.items(): get_through_rss_query(novell, mimetype, "novell", extension) for (mimetype,extension) in mimetypes.items(): get_through_rss_query(gnome, mimetype, "gnome", extension) for (mimetype,extension) in mimetypes.items(): get_through_rss_query(abisource, mimetype, "abi", extension) for (mimetype,extension) in mimetypes.items(): get_through_rss_query(kde, mimetype, "kde", extension) try: get_launchpad_bugs("lp") except ImportError: print("launchpadlib unavailable, skipping Ubuntu tracker") # vim:set shiftwidth=4 softtabstop=4 expandtab: