mkdb: don't output acronym tags for unknown stability levels
[gtk-doc.git] / bugzilla.py
blob583a628e41ae9da834c1933ed01c1e25286a463b
1 #!/usr/bin/env python
2 # -*- Mode: Python -*-
3 # vi:si:et:sw=4:sts=4:ts=4
6 # parse HTML from bugzilla.gnome.org to create a list of bugs for a given
7 # product, component and target_milestone
9 import re
10 import os
11 import sys
12 import codecs
13 import urllib
14 import HTMLParser
16 # a sample bug line we parse for future reference:
17 #<TR VALIGN=TOP ALIGN=LEFT CLASS="Nor" ><TD><A HREF="show_bug.cgi?id=78267">78267</A> <td class=severity><nobr>min</nobr></td><td class=priority><nobr>Nor</nobr></td><td class=owner><nobr>thomas@apestaart.org</nobr></td><td class=status><nobr>RESO</nobr></td><td class=resolution><nobr>FIXE</nobr></td><td class=summary>autogen.sh doesn't take --prefix and similar to configure</td></TR>
19 # a sample bug section after olav's updating of bugzilla
20 # <td class="first-child">
21 # <a href="show_bug.cgi?id=147641">147641</a>
22 # <span style="display: none"></span>
23 # </td>
25 # <td style="white-space: nowrap">nor
26 # </td>
27 # <td style="white-space: nowrap">Nor
28 # </td>
29 # <td style="white-space: nowrap">Linu
30 # </td>
31 # <td style="white-space: nowrap">GStreamer
32 # </td>
33 # <td style="white-space: nowrap">RESO
34 # </td>
35 # <td style="white-space: nowrap">FIXE
36 # </td>
37 # <td >[docs] pydoc segfaults when viewing gst package doc
38 # </td>
40 # </tr>
43 URL = 'http://bugzilla.gnome.org/buglist.cgi?product=%s&component=%s&target_milestone=%s'
45 # reg = re.compile('<TR.*id=(\d+)".*summary>(.*)<\/td')
47 HEADER = ' Changes\n'
48 ITEM = ' o %s : %s'
49 FOOTER = '\n Contributors\n'
51 default_product = "gtk-doc"
53 TD_ID = 1
54 TD_SUMMARY = 7
55 # after Olav's changes, it's now number 8
56 #TD_SUMMARY = 8
58 # Horrible, don't look here
59 class HP(HTMLParser.HTMLParser):
60 def __init__(self):
61 HTMLParser.HTMLParser.__init__(self)
62 self.tr = 0
63 self.td = 0
64 self.bugs = []
65 self.bugno = 0
66 self.descr = ""
68 def handle_starttag(self, tag, data):
69 if tag == 'tr':
70 self.tr = 1
71 return
72 # count td's
73 elif self.tr and tag.startswith('td'):
74 self.td += 1
76 # all &gt; refs are handled through this method; append them to self.descr
77 def handle_entityref(self, name):
78 self.descr += " &%s; " % name
80 # can be called more than once for one td
81 def handle_data(self, data):
82 if not self.tr:
83 return
84 data = data.strip()
85 if not data:
86 return
88 #print self.td, self.tr, repr(data)
90 # check what td it is in
91 if self.td == TD_ID:
92 try:
93 self.bugno = int(data)
94 #print "got id: ", self.bugno
95 except ValueError:
96 self.bugno = 0
97 elif self.td == TD_SUMMARY:
98 # the summary td
99 self.descr += data
100 #print "got descr: ", self.descr
102 def handle_endtag(self, tag):
103 if tag == 'tr':
104 self.tr = 0
105 self.td = 0
106 #print "end tag: ", self.bugno, self.descr
107 if self.bugno != 0:
108 self.bugs.append((self.bugno, self.descr))
109 self.bugno = 0
110 self.descr = ""
112 def main(args):
113 if len(args) < 3:
114 print 'Usage: %s component milestone [product] [file]' % args[0]
115 return 2
117 component = args[1]
118 milestone = args[2]
120 if len(args) <= 3:
121 product = default_product
122 else:
123 product = args[3]
125 if len(args) <= 4:
126 output = None
127 else:
128 output = args[4]
130 url = URL % (product, urllib.quote(component), milestone)
131 fd = urllib.urlopen(url)
133 hp = HP()
134 hp.feed(fd.read())
136 lines = ["\n", ]
137 lines.append(HEADER)
138 for bug_id, summary in hp.bugs:
139 lines.append(ITEM % (bug_id, summary))
140 lines.append(FOOTER)
141 bugs = "\n".join(lines)
143 if not output:
144 print bugs
145 else:
146 # get original
147 #doc = codecs.open(output, "r", encoding='utf-8').read()
148 doc = open(output, "r").read()
149 matcher = re.compile('(.*)<bugs>.*</bugs>(.*)',
150 re.DOTALL)
151 match = matcher.search(doc)
152 pre = match.expand('\\1')
153 post = match.expand('\\2')
155 backup = output + ".bugs.bak"
156 os.rename(output, backup)
157 handle = open(output, "w")
158 handle.write(pre + bugs + post)
160 if __name__ == '__main__':
161 sys.exit(main(sys.argv))