bin/get-bugzilla-attachments-by-mimetype

   1 #!/usr/bin/env python
   2 # Version: MPL 1.1 / GPLv3+ / LGPLv3+
   3 #
   4 # The contents of this file are subject to the Mozilla Public License Version
   5 # 1.1 (the "License"); you may not use this file except in compliance with
   6 # the License or as specified alternatively below. You may obtain a copy of
   7 # the License at http://www.mozilla.org/MPL/
   8 #
   9 # Software distributed under the License is distributed on an "AS IS" basis,
  10 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11 # for the specific language governing rights and limitations under the
  12 # License.
  13 #
  14 # Major Contributor(s):
  15 # Copyright (C) 2011 Red Hat, Inc., Caolán McNamara <caolanm@redhat.com>
  16 #  (initial developer)
  17 #
  18 # All Rights Reserved.
  19 #
  20 # For minor contributions see the git repository.
  21 #
  22 # Alternatively, the contents of this file may be used under the terms of
  23 # either the GNU General Public License Version 3 or later (the "GPLv3+"), or
  24 # the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
  25 # in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
  26 # instead of those above.
  27
  28 #This digs through a pile of bugzilla's and populates the cwd with a big
  29 #collection of bug-docs in per-filetype dirs with bug-ids as names with
  30 #prefixes to indicate which bug-tracker, e.g.
  31 #
  32 #fdo-bugid-X.suffix
  33 #rhbz-bugid-X.suffix
  34 #moz-bugid-X.suffix
  35 #
  36 #where X is the n'th attachment of that type in the bug
  37
  38 import urllib
  39 import feedparser
  40 import base64
  41 import os, os.path
  42 import xmlrpclib
  43 from xml.dom import minidom
  44 from xml.sax.saxutils import escape
  45
  46 def get_from_bug_url_via_xml(url, mimetype, prefix, suffix):
  47     id = url.rsplit('=', 2)[1]
  48     print "id is", prefix, id, suffix
  49     if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
  50         print "assuming", id, "is up to date"
  51     else:
  52         print "parsing", id
  53         sock = urllib.urlopen(url+"&ctype=xml")
  54         dom = minidom.parse(sock)
  55         sock.close()
  56         attachmentid=1
  57         for attachment in dom.getElementsByTagName('attachment'):
  58             print " mimetype is",
  59             for node in attachment.childNodes:
  60                 if node.nodeName == 'type':
  61                     print node.firstChild.nodeValue,
  62                     if node.firstChild.nodeValue.lower() != mimetype.lower():
  63                         print 'skipping'
  64                         break
  65                 elif node.nodeName == 'data':
  66                     download = suffix + '/' +prefix + id + '-' + str(attachmentid) + '.' + suffix
  67                     print 'downloading as', download
  68                     f = open(download, 'w')
  69                     f.write(base64.b64decode(node.firstChild.nodeValue))
  70                     f.close()
  71                     attachmentid += 1
  72                     break
  73
  74 def get_through_rpc_query(rpcurl, showurl, mimetype, prefix, suffix):
  75     try:
  76         proxy = xmlrpclib.ServerProxy(rpcurl)
  77         query = dict()
  78         query['column_list']='bug_id'
  79         query['query_format']='advanced'
  80         query['field0-0-0']='attachments.mimetype'
  81         query['type0-0-0']='equals'
  82         query['value0-0-0']=mimetype
  83         result = proxy.Bug.search(query)
  84         bugs = result['bugs']
  85         print len(bugs), 'bugs to process'
  86         for bug in bugs:
  87             url = showurl + str(bug['bug_id'])
  88             get_from_bug_url_via_xml(url, mimetype, prefix, suffix)
  89     except xmlrpclib.Fault, err:
  90             print "A fault occurred"
  91         print "Fault code: %s" % err.faultCode
  92         print err.faultString
  93
  94 def get_through_rss_query_url(url, mimetype, prefix, suffix):
  95     try:
  96         os.mkdir(suffix)
  97     except:
  98         pass
  99     d = feedparser.parse(url)
 100     for entry in d['entries']:
 101         get_from_bug_url_via_xml(entry['id'], mimetype, prefix, suffix)
 102
 103 def get_through_rss_query(queryurl, mimetype, prefix, suffix):
 104     url = queryurl + '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype) + '&ctype=rss'
 105     print 'url is', url
 106     get_through_rss_query_url(url, mimetype, prefix, suffix)
 107
 108
 109 freedesktop = 'http://bugs.freedesktop.org/buglist.cgi'
 110 openoffice = 'http://openoffice.org/bugzilla/buglist.cgi'
 111 redhatrpc = 'https://bugzilla.redhat.com/xmlrpc.cgi'
 112 redhatbug = 'https://bugzilla.redhat.com/show_bug.cgi?id='
 113 novell = 'https://bugzilla.novell.com/buglist.cgi'
 114 mozilla = 'https://bugzilla.mozilla.org/buglist.cgi'
 115
 116 get_through_rss_query(freedesktop, 'application/msword', "fdo", "doc")
 117 get_through_rss_query(freedesktop, 'application/rtf', "fdo", "rtf")
 118 get_through_rss_query(freedesktop, 'text/rtf', "fdo", "rtf")
 119 get_through_rss_query(freedesktop, 'text/spreadsheet', "fdo", "slk")
 120 get_through_rss_query(freedesktop, 'application/vnd.ms-powerpoint', "fdo", "ppt")
 121
 122 get_through_rpc_query(redhatrpc, redhatbug, 'application/msword', "rhbz", "doc")
 123 get_through_rpc_query(redhatrpc, redhatbug, 'application/rtf', "rhbz", "rtf")
 124 get_through_rpc_query(redhatrpc, redhatbug, 'text/rtf', "rhbz", "rtf")
 125 get_through_rpc_query(redhatrpc, redhatbug, 'text/spreadsheet', "rhbz", "slk")
 126 get_through_rpc_query(redhatrpc, redhatbug, 'application/vnd.ms-powerpoint', "rhbz", "ppt")
 127
 128 #to-do, get attachments some other way, not inline in xml
 129 #get_through_rss_query(novell, 'application/msword', "n", "doc")
 130
 131 get_through_rss_query(openoffice, 'application/msword', "ooo", "doc")
 132 get_through_rss_query(openoffice, 'application/rtf', "ooo", "rtf")
 133 get_through_rss_query(openoffice, 'text/rtf', "ooo", "rtf")
 134 get_through_rss_query(openoffice, 'text/spreadsheet', "ooo", "slk")
 135 get_through_rss_query(openoffice, 'application/vnd.ms-powerpoint', "ooo", "ppt")
 136
 137 # vim:set shiftwidth=4 softtabstop=4 expandtab: