6e18e1f54b
Change-Id: I113f7bf3fd4194011efe83b1776ca42ad489f652
377 lines
14 KiB
Python
Executable file
377 lines
14 KiB
Python
Executable file
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
# Version: MPL 1.1 / GPLv3+ / LGPLv3+
|
|
#
|
|
# The contents of this file are subject to the Mozilla Public License Version
|
|
# 1.1 (the "License"); you may not use this file except in compliance with
|
|
# the License or as specified alternatively below. You may obtain a copy of
|
|
# the License at http://www.mozilla.org/MPL/
|
|
#
|
|
# Software distributed under the License is distributed on an "AS IS" basis,
|
|
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
# for the specific language governing rights and limitations under the
|
|
# License.
|
|
#
|
|
# Major Contributor(s):
|
|
# Copyright (C) 2011 Red Hat, Inc., Caolán McNamara <caolanm@redhat.com>
|
|
# (initial developer)
|
|
#
|
|
# All Rights Reserved.
|
|
#
|
|
# For minor contributions see the git repository.
|
|
#
|
|
# Alternatively, the contents of this file may be used under the terms of
|
|
# either the GNU General Public License Version 3 or later (the "GPLv3+"), or
|
|
# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
|
|
# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
|
|
# instead of those above.
|
|
|
|
#This digs through a pile of bugzilla's and populates the cwd with a big
|
|
#collection of bug-docs in per-filetype dirs with bug-ids as names with
|
|
#prefixes to indicate which bug-tracker, e.g.
|
|
#
|
|
#fdo-bugid-X.suffix
|
|
#rhbz-bugid-X.suffix
|
|
#moz-bugid-X.suffix
|
|
#
|
|
#where X is the n'th attachment of that type in the bug
|
|
|
|
import urllib
|
|
import feedparser
|
|
import base64
|
|
import re
|
|
import os, os.path
|
|
import sys
|
|
import xmlrpclib
|
|
from xml.dom import minidom
|
|
from xml.sax.saxutils import escape
|
|
|
|
def urlopen_retry(url):
|
|
maxretries = 3
|
|
for i in range(maxretries + 1):
|
|
try:
|
|
return urllib.urlopen(url)
|
|
except IOError as e:
|
|
print "caught IOError: ", e
|
|
if maxretries == i:
|
|
raise
|
|
print "retrying..."
|
|
|
|
def get_from_bug_url_via_xml(url, mimetype, prefix, suffix):
|
|
id = url.rsplit('=', 2)[1]
|
|
print "id is", prefix, id, suffix
|
|
if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
|
|
print "assuming", id, "is up to date"
|
|
else:
|
|
print "parsing", id
|
|
sock = urlopen_retry(url+"&ctype=xml")
|
|
dom = minidom.parse(sock)
|
|
sock.close()
|
|
attachmentid=0
|
|
for attachment in dom.getElementsByTagName('attachment'):
|
|
attachmentid += 1
|
|
print " mimetype is",
|
|
for node in attachment.childNodes:
|
|
if node.nodeName == 'type':
|
|
print node.firstChild.nodeValue,
|
|
if node.firstChild.nodeValue.lower() != mimetype.lower():
|
|
print 'skipping'
|
|
break
|
|
elif node.nodeName == 'data':
|
|
# check if attachment is deleted (i.e. https://bugs.kde.org/show_bug.cgi?id=53343&ctype=xml)
|
|
if not node.firstChild:
|
|
print 'deleted attachment, skipping'
|
|
continue
|
|
|
|
download = suffix + '/' +prefix + id + '-' + str(attachmentid) + '.' + suffix
|
|
print 'downloading as', download
|
|
f = open(download, 'w')
|
|
f.write(base64.b64decode(node.firstChild.nodeValue))
|
|
f.close()
|
|
break
|
|
|
|
def get_novell_bug_via_xml(url, mimetype, prefix, suffix):
|
|
id = url.rsplit('=', 2)[1]
|
|
print "id is", prefix, id, suffix
|
|
if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
|
|
print "assuming", id, "is up to date"
|
|
else:
|
|
print "parsing", id
|
|
sock = urlopen_retry(url+"&ctype=xml")
|
|
dom = minidom.parse(sock)
|
|
sock.close()
|
|
attachmentid=0
|
|
for comment in dom.getElementsByTagName('thetext'):
|
|
commentText = comment.firstChild.nodeValue
|
|
match = re.search(r".*Created an attachment \(id=([0-9]+)\)", commentText)
|
|
if not match:
|
|
continue
|
|
|
|
attachmentid += 1
|
|
|
|
realAttachmentId = match.group(1)
|
|
handle = urlopen_retry(novellattach + realAttachmentId)
|
|
if not handle:
|
|
print "attachment %s is not accessible", realAttachmentId
|
|
continue
|
|
print " mimetype is",
|
|
|
|
remoteMime = handle.info().gettype()
|
|
print remoteMime,
|
|
if remoteMime != mimetype:
|
|
print "skipping"
|
|
continue
|
|
|
|
download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
|
|
print 'downloading as', download
|
|
f = open(download, 'w')
|
|
f.write(handle.read())
|
|
f.close()
|
|
|
|
def get_through_rpc_query(rpcurl, showurl, mimetype, prefix, suffix):
|
|
try:
|
|
proxy = xmlrpclib.ServerProxy(rpcurl)
|
|
query = dict()
|
|
query['column_list']='bug_id'
|
|
query['query_format']='advanced'
|
|
query['field0-0-0']='attachments.mimetype'
|
|
query['type0-0-0']='equals'
|
|
query['value0-0-0']=mimetype
|
|
result = proxy.Bug.search(query)
|
|
bugs = result['bugs']
|
|
print len(bugs), 'bugs to process'
|
|
for bug in bugs:
|
|
url = showurl + str(bug['id'])
|
|
get_from_bug_url_via_xml(url, mimetype, prefix, suffix)
|
|
except xmlrpclib.Fault, err:
|
|
print "A fault occurred"
|
|
print "Fault code: %s" % err.faultCode
|
|
print err.faultString
|
|
|
|
def get_through_rss_query_url(url, mimetype, prefix, suffix):
|
|
try:
|
|
os.mkdir(suffix)
|
|
except:
|
|
pass
|
|
d = feedparser.parse(url)
|
|
|
|
#Getting detailed bug information and downloading an attachment body is not possible without logging in to Novell bugzilla
|
|
#get_novell_bug_via_xml function is a workaround for that situation
|
|
get_bug_function = get_novell_bug_via_xml if prefix == "novell" else get_from_bug_url_via_xml
|
|
|
|
for entry in d['entries']:
|
|
try:
|
|
get_bug_function(entry['id'], mimetype, prefix, suffix)
|
|
except:
|
|
print entry['id'], "failed:", sys.exc_info()[0]
|
|
pass
|
|
|
|
def get_through_rss_query(queryurl, mimetype, prefix, suffix):
|
|
url = queryurl + '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype) + '&ctype=rss'
|
|
print 'url is', url
|
|
get_through_rss_query_url(url, mimetype, prefix, suffix)
|
|
|
|
def get_launchpad_bugs(prefix):
|
|
#launchpadlib python module is required to download launchpad attachments
|
|
from launchpadlib.launchpad import Launchpad
|
|
|
|
launchpad = Launchpad.login_anonymously("attachmentdownload", "production")
|
|
ubuntu = launchpad.distributions["ubuntu"]
|
|
|
|
#since searching bugs having attachments with specific mimetypes is not available in launchpad API
|
|
#we're iterating over all bugs of the libreoffice source package
|
|
libo = ubuntu.getSourcePackage(name="libreoffice")
|
|
libobugs = libo.getBugTasks()
|
|
|
|
for bugtask in libobugs:
|
|
bug = bugtask.bug
|
|
id = str(bug.id)
|
|
print "parsing ", id, "status:", bugtask.status, "title:", bug.title[:50]
|
|
attachmentid = 0
|
|
for attachment in bug.attachments:
|
|
attachmentid += 1
|
|
handle = attachment.data.open()
|
|
if not handle.content_type in mimetypes:
|
|
#print "skipping"
|
|
continue
|
|
|
|
suffix = mimetypes[handle.content_type]
|
|
if not os.path.isdir(suffix):
|
|
try:
|
|
os.mkdir(suffix)
|
|
except:
|
|
pass
|
|
|
|
download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
|
|
|
|
if os.path.isfile(download):
|
|
print "assuming", id, "is up to date"
|
|
break
|
|
|
|
print 'mimetype is', handle.content_type, 'downloading as', download
|
|
|
|
f = open(download, "w")
|
|
f.write(handle.read())
|
|
f.close()
|
|
|
|
freedesktop = 'http://bugs.freedesktop.org/buglist.cgi'
|
|
abisource = 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword
|
|
gnome = 'http://bugzilla.gnome.org/buglist.cgi' # added for gnumeric
|
|
kde = 'http://bugs.kde.org/buglist.cgi' # added for koffice/calligra
|
|
openoffice = 'https://issues.apache.org/ooo/buglist.cgi'
|
|
redhatrpc = 'https://bugzilla.redhat.com/xmlrpc.cgi'
|
|
redhatbug = 'https://bugzilla.redhat.com/show_bug.cgi?id='
|
|
mozilla = 'https://bugzilla.mozilla.org/buglist.cgi'
|
|
|
|
#Novell Bugzilla requires users to log in in order to get details of the bugs such as attachment bodies etc.
|
|
#As a dirty workaround, we parse comments containing "Created an attachment (id=xxxxxx)" and download attachments manually
|
|
#python-bugzilla claims that it supports Novell bugzilla login but it's not working right now and novell bugzilla login
|
|
#system is a nightmare
|
|
novellattach = 'https://bugzilla.novell.com/attachment.cgi?id='
|
|
novell = 'https://bugzilla.novell.com/buglist.cgi'
|
|
|
|
mimetypes = {
|
|
# ODF
|
|
'application/vnd.oasis.opendocument.base': 'odb',
|
|
'application/vnd.oasis.opendocument.database': 'odb',
|
|
'application/vnd.oasis.opendocument.chart': 'odc',
|
|
'application/vnd.oasis.opendocument.chart-template': 'otc',
|
|
'application/vnd.oasis.opendocument.formula': 'odf',
|
|
'application/vnd.oasis.opendocument.formula-template': 'otf',
|
|
'application/vnd.oasis.opendocument.graphics': 'odg',
|
|
'application/vnd.oasis.opendocument.graphics-template': 'otg',
|
|
'application/vnd.oasis.opendocument.graphics-flat-xml': 'fodg',
|
|
'application/vnd.oasis.opendocument.presentation': 'odp',
|
|
'application/vnd.oasis.opendocument.presentation-template': 'otp',
|
|
'application/vnd.oasis.opendocument.presentation-flat-xml': 'fodp',
|
|
'application/vnd.oasis.opendocument.spreadsheet': 'ods',
|
|
'application/vnd.oasis.opendocument.spreadsheet-template': 'ots',
|
|
'application/vnd.oasis.opendocument.spreadsheet-flat-xml': 'fods',
|
|
'application/vnd.oasis.opendocument.text': 'odt',
|
|
'application/vnd.oasis.opendocument.text-flat-xml': 'fodt',
|
|
'application/vnd.oasis.opendocument.text-master': 'odm',
|
|
'application/vnd.oasis.opendocument.text-template': 'ott',
|
|
'application/vnd.oasis.opendocument.text-web': 'oth',
|
|
# OOo XML
|
|
'application/vnd.sun.xml.base': 'odb',
|
|
'application/vnd.sun.xml.calc': 'sxc',
|
|
'application/vnd.sun.xml.calc.template': 'stc',
|
|
'application/vnd.sun.xml.chart': 'sxs',
|
|
'application/vnd.sun.xml.draw': 'sxd',
|
|
'application/vnd.sun.xml.draw.template': 'std',
|
|
'application/vnd.sun.xml.impress': 'sxi',
|
|
'application/vnd.sun.xml.impress.template': 'sti',
|
|
'application/vnd.sun.xml.math': 'sxm',
|
|
'application/vnd.sun.xml.writer': 'sxw',
|
|
'application/vnd.sun.xml.writer.global': 'sxg',
|
|
'application/vnd.sun.xml.writer.template': 'stw',
|
|
'application/vnd.sun.xml.writer.web': 'stw',
|
|
# MSO
|
|
'application/rtf': 'rtf',
|
|
'text/rtf': 'rtf',
|
|
'application/msword': 'doc',
|
|
'application/vnd.ms-powerpoint': 'ppt',
|
|
'application/vnd.ms-excel': 'xls',
|
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
|
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx',
|
|
'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
|
|
'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx',
|
|
'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx',
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx',
|
|
'application/vnd.visio': 'vsd',
|
|
'application/vnd.visio.xml': 'vdx',
|
|
# W3C
|
|
'application/xhtml+xml': 'xhtml',
|
|
'application/mathml+xml': 'mml',
|
|
'text/html': 'html',
|
|
'application/docbook+xml': 'docbook',
|
|
# misc
|
|
'text/spreadsheet': 'slk',
|
|
'application/vnd.corel-draw': 'cdr',
|
|
'application/vnd.lotus-wordpro': 'lwp',
|
|
'application/vnd.lotus-1-2-3': 'wks',
|
|
'application/vnd.wordperfect': 'wpd',
|
|
'application/vnd.ms-works': 'wps',
|
|
'application/x-hwp': 'hwp',
|
|
'application/x-aportisdoc': 'pdb',
|
|
'application/x-pocket-word': 'psw',
|
|
'application/x-t602': '602',
|
|
# binfilter
|
|
'application/x-starcalc': 'sdc',
|
|
'application/vnd.stardivision.calc': 'sdc5',
|
|
'application/x-starchart': 'sds',
|
|
'application/vnd.stardivision.chart': 'sds5',
|
|
'application/x-stardraw': 'sdd_d',
|
|
'application/vnd.stardivision.draw': 'sda5',
|
|
'application/x-starimpress': 'sdd_i',
|
|
'application/vnd.stardivision.impress': 'sdd5',
|
|
'application/vnd.stardivision.impress-packed': 'sdp5',
|
|
'application/x-starmath': 'smf',
|
|
'application/vnd.stardivision.math': 'smf5',
|
|
'application/x-starwriter': 'sdw',
|
|
'application/vnd.stardivision.writer': 'sdw5',
|
|
'application/vnd.stardivision.writer-global': 'sgl5',
|
|
# relatively uncommon image mimetypes
|
|
'image/cgm': 'cgm',
|
|
'image/tiff': 'tiff',
|
|
'image/vnd.dxf': 'dxf',
|
|
'image/x-emf': 'emf',
|
|
'image/x-targa': 'tga',
|
|
'image/x-sgf': 'sgf',
|
|
'image/x-svm': 'svm',
|
|
'image/x-wmf': 'wmf',
|
|
'image/x-pict': 'pict',
|
|
}
|
|
|
|
# disabled for now, this would download gigs of pngs/jpegs...
|
|
common_noncore_mimetypes = [
|
|
# graphics
|
|
('image/svg+xml', 'svg'),
|
|
('image/x-MS-bmp', 'bmp'),
|
|
('image/x-wpg', 'wpg'),
|
|
('image/x-eps', 'eps'),
|
|
('image/x-met', 'met'),
|
|
('image/x-portable-bitmap', 'pbm'),
|
|
('image/x-photo-cd', 'pcd'),
|
|
('image/x-pcx', 'pcx'),
|
|
('image/x-portable-graymap', 'pgm'),
|
|
('image/x-portable-pixmap', 'ppm'),
|
|
('image/vnd.adobe.photoshop', 'psd'),
|
|
('image/x-cmu-raster', 'ras'),
|
|
('image/x-xbitmap', 'xbm'),
|
|
('image/x-xpixmap', 'xpm'),
|
|
('image/gif', 'gif'),
|
|
('image/jpeg', 'jpeg'),
|
|
('image/png', 'png'),
|
|
# pdf, etc.
|
|
('application/pdf', 'pdf'),
|
|
]
|
|
|
|
for (mimetype,extension) in mimetypes.items():
|
|
get_through_rss_query(freedesktop, mimetype, "fdo", extension)
|
|
|
|
for (mimetype,extension) in mimetypes.items():
|
|
get_through_rpc_query(redhatrpc, redhatbug, mimetype, "rhbz", extension)
|
|
|
|
for (mimetype,extension) in mimetypes.items():
|
|
get_through_rss_query(openoffice, mimetype, "ooo", extension)
|
|
|
|
for (mimetype,extension) in mimetypes.items():
|
|
get_through_rss_query(novell, mimetype, "novell", extension)
|
|
|
|
for (mimetype,extension) in mimetypes.items():
|
|
get_through_rss_query(gnome, mimetype, "gnome", extension)
|
|
|
|
for (mimetype,extension) in mimetypes.items():
|
|
get_through_rss_query(abisource, mimetype, "abi", extension)
|
|
|
|
for (mimetype,extension) in mimetypes.items():
|
|
get_through_rss_query(kde, mimetype, "kde", extension)
|
|
|
|
try:
|
|
get_launchpad_bugs("lp")
|
|
except ImportError:
|
|
print "launchpadlib unavailable, skipping Ubuntu tracker"
|
|
|
|
# vim:set shiftwidth=4 softtabstop=4 expandtab:
|