version bump to 0.3.0.13
[tor.git] / scripts / maint / format_changelog.py
blobe909fc550aa6120c5573adb13e903f2a72f035f9
1 #!/usr/bin/python
2 # Copyright (c) 2014-2015, The Tor Project, Inc.
3 # See LICENSE for licensing information
5 # This script reformats a section of the changelog to wrap everything to
6 # the right width and put blank lines in the right places. Eventually,
7 # it might include a linter.
9 # To run it, pipe a section of the changelog (starting with "Changes
10 # in Tor 0.x.y.z-alpha" through the script.)
12 import os
13 import re
14 import sys
15 import optparse
17 # ==============================
18 # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
19 # algorithm, with totally ad hoc parameters!
21 # We're trying to minimize:
22 # The total of the cubes of ragged space on underflowed intermediate lines,
23 # PLUS
24 # 100 * the fourth power of overflowed characters
25 # PLUS
26 # .1 * a bit more than the cube of ragged space on the last line.
27 # PLUS
28 # OPENPAREN_PENALTY for each line that starts with (
30 # We use an obvious dynamic programming algorithm to sorta approximate this.
31 # It's not coded right or optimally, but it's fast enough for changelogs
33 # (Code found in an old directory of mine, lightly cleaned. -NM)
35 NO_HYPHENATE=set("""
36 pf-divert
37 tor-resolve
38 tor-gencert
39 """.split())
41 LASTLINE_UNDERFLOW_EXPONENT = 1
42 LASTLINE_UNDERFLOW_PENALTY = 1
44 UNDERFLOW_EXPONENT = 3
45 UNDERFLOW_PENALTY = 1
47 OVERFLOW_EXPONENT = 4
48 OVERFLOW_PENALTY = 2000
50 ORPHAN_PENALTY = 10000
52 OPENPAREN_PENALTY = 200
54 def generate_wrapping(words, divisions):
55 lines = []
56 last = 0
57 for i in divisions:
58 w = words[last:i]
59 last = i
60 line = " ".join(w).replace("\xff ","-").replace("\xff","-")
61 lines.append(line.strip())
62 return lines
64 def wrapping_quality(words, divisions, width1, width2):
65 total = 0.0
67 lines = generate_wrapping(words, divisions)
68 for line in lines:
69 length = len(line)
70 if line is lines[0]:
71 width = width1
72 else:
73 width = width2
75 if line[0:1] == '(':
76 total += OPENPAREN_PENALTY
78 if length > width:
79 total += OVERFLOW_PENALTY * (
80 (length - width) ** OVERFLOW_EXPONENT )
81 else:
82 if line is lines[-1]:
83 e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
84 if " " not in line:
85 total += ORPHAN_PENALTY
86 else:
87 e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
89 total += p * ((width - length) ** e)
91 return total
93 def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
94 wrapping_after = [ (0,), ]
96 w1 = width - prefix_len1
97 w2 = width - prefix_len2
99 for i in range(1, len(words)+1):
100 best_so_far = None
101 best_score = 1e300
102 for j in range(i):
103 t = wrapping_after[j]
104 t1 = t[:-1] + (i,)
105 t2 = t + (i,)
106 wq1 = wrapping_quality(words, t1, w1, w2)
107 wq2 = wrapping_quality(words, t2, w1, w2)
109 if wq1 < best_score:
110 best_so_far = t1
111 best_score = wq1
112 if wq2 < best_score:
113 best_so_far = t2
114 best_score = wq2
115 wrapping_after.append( best_so_far )
117 lines = generate_wrapping(words, wrapping_after[-1])
119 return lines
121 def hyphenatable(word):
122 if "--" in word:
123 return False
125 if re.match(r'^[^\d\-]\D*-', word):
126 stripped = re.sub(r'^\W+','',word)
127 stripped = re.sub(r'\W+$','',word)
128 return stripped not in NO_HYPHENATE
129 else:
130 return False
132 def split_paragraph(s):
133 "Split paragraph into words; tuned for Tor."
135 r = []
136 for word in s.split():
137 if hyphenatable(word):
138 while "-" in word:
139 a,word = word.split("-",1)
140 r.append(a+"\xff")
141 r.append(word)
142 return r
144 def fill(text, width, initial_indent, subsequent_indent):
145 words = split_paragraph(text)
146 lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
147 width)
148 res = [ initial_indent, lines[0], "\n" ]
149 for line in lines[1:]:
150 res.append(subsequent_indent)
151 res.append(line)
152 res.append("\n")
153 return "".join(res)
155 # ==============================
158 TP_MAINHEAD = 0
159 TP_HEADTEXT = 1
160 TP_BLANK = 2
161 TP_SECHEAD = 3
162 TP_ITEMFIRST = 4
163 TP_ITEMBODY = 5
164 TP_END = 6
165 TP_PREHEAD = 7
167 def head_parser(line):
168 if re.match(r'^Changes in', line):
169 return TP_MAINHEAD
170 elif re.match(r'^[A-Za-z]', line):
171 return TP_PREHEAD
172 elif re.match(r'^ o ', line):
173 return TP_SECHEAD
174 elif re.match(r'^\s*$', line):
175 return TP_BLANK
176 else:
177 return TP_HEADTEXT
179 def body_parser(line):
180 if re.match(r'^ o ', line):
181 return TP_SECHEAD
182 elif re.match(r'^ -',line):
183 return TP_ITEMFIRST
184 elif re.match(r'^ \S', line):
185 return TP_ITEMBODY
186 elif re.match(r'^\s*$', line):
187 return TP_BLANK
188 elif re.match(r'^Changes in', line):
189 return TP_END
190 elif re.match(r'^\s+\S', line):
191 return TP_HEADTEXT
192 else:
193 print "Weird line %r"%line
195 def clean_head(head):
196 return head
198 def head_score(s):
199 m = re.match(r'^ +o (.*)', s)
200 if not m:
201 print >>sys.stderr, "Can't score %r"%s
202 return 99999
203 lw = m.group(1).lower()
204 if lw.startswith("security") and "feature" not in lw:
205 score = -300
206 elif lw.startswith("deprecated version"):
207 score = -200
208 elif (('new' in lw and 'requirement' in lw) or
209 ('new' in lw and 'dependenc' in lw) or
210 ('build' in lw and 'requirement' in lw) or
211 ('removed' in lw and 'platform' in lw)):
212 score = -100
213 elif lw.startswith("major feature"):
214 score = 00
215 elif lw.startswith("major bug"):
216 score = 50
217 elif lw.startswith("major"):
218 score = 70
219 elif lw.startswith("minor feature"):
220 score = 200
221 elif lw.startswith("minor bug"):
222 score = 250
223 elif lw.startswith("minor"):
224 score = 270
225 else:
226 score = 1000
228 if 'secur' in lw:
229 score -= 2
231 if "(other)" in lw:
232 score += 2
234 if '(' not in lw:
235 score -= 1
237 return score
239 class ChangeLog(object):
240 def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
241 self.prehead = []
242 self.mainhead = None
243 self.headtext = []
244 self.curgraf = None
245 self.sections = []
246 self.cursection = None
247 self.lineno = 0
248 self.wrapText = wrapText
249 self.blogOrder = blogOrder
250 self.drupalBreak = drupalBreak
252 def addLine(self, tp, line):
253 self.lineno += 1
255 if tp == TP_MAINHEAD:
256 assert not self.mainhead
257 self.mainhead = line
259 elif tp == TP_PREHEAD:
260 self.prehead.append(line)
262 elif tp == TP_HEADTEXT:
263 if self.curgraf is None:
264 self.curgraf = []
265 self.headtext.append(self.curgraf)
266 self.curgraf.append(line)
268 elif tp == TP_BLANK:
269 self.curgraf = None
271 elif tp == TP_SECHEAD:
272 self.cursection = [ self.lineno, line, [] ]
273 self.sections.append(self.cursection)
275 elif tp == TP_ITEMFIRST:
276 item = ( self.lineno, [ [line] ])
277 self.curgraf = item[1][0]
278 self.cursection[2].append(item)
280 elif tp == TP_ITEMBODY:
281 if self.curgraf is None:
282 self.curgraf = []
283 self.cursection[2][-1][1].append(self.curgraf)
284 self.curgraf.append(line)
286 else:
287 assert "This" is "unreachable"
289 def lint_head(self, line, head):
290 m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
291 if not m:
292 print >>sys.stderr, "Weird header format on line %s"%line
294 def lint_item(self, line, grafs, head_type):
295 pass
297 def lint(self):
298 self.head_lines = {}
299 for sec_line, sec_head, items in self.sections:
300 head_type = self.lint_head(sec_line, sec_head)
301 for item_line, grafs in items:
302 self.lint_item(item_line, grafs, head_type)
304 def dumpGraf(self,par,indent1,indent2=-1):
305 if not self.wrapText:
306 for line in par:
307 print line
308 return
310 if indent2 == -1:
311 indent2 = indent1
312 text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
314 sys.stdout.write(fill(text,
315 width=72,
316 initial_indent=" "*indent1,
317 subsequent_indent=" "*indent2))
319 def dumpPreheader(self, graf):
320 self.dumpGraf(graf, 0)
321 print
323 def dumpMainhead(self, head):
324 print head
326 def dumpHeadGraf(self, graf):
327 self.dumpGraf(graf, 2)
328 print
330 def dumpSectionHeader(self, header):
331 print header
333 def dumpStartOfSections(self):
334 pass
336 def dumpEndOfSections(self):
337 pass
339 def dumpEndOfSection(self):
340 print
342 def dumpEndOfChangelog(self):
343 print
345 def dumpDrupalBreak(self):
346 pass
348 def dumpItem(self, grafs):
349 self.dumpGraf(grafs[0],4,6)
350 for par in grafs[1:]:
351 print
352 self.dumpGraf(par,6,6)
354 def collateAndSortSections(self):
355 heads = []
356 sectionsByHead = { }
357 for _, head, items in self.sections:
358 head = clean_head(head)
359 try:
360 s = sectionsByHead[head]
361 except KeyError:
362 s = sectionsByHead[head] = []
363 heads.append( (head_score(head), head.lower(), head, s) )
365 s.extend(items)
367 heads.sort()
368 self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
370 def dump(self):
371 if self.prehead:
372 self.dumpPreheader(self.prehead)
374 if not self.blogOrder:
375 self.dumpMainhead(self.mainhead)
377 for par in self.headtext:
378 self.dumpHeadGraf(par)
380 if self.blogOrder:
381 self.dumpMainhead(self.mainhead)
383 drupalBreakAfter = None
384 if self.drupalBreak and len(self.sections) > 4:
385 drupalBreakAfter = self.sections[1][2]
387 self.dumpStartOfSections()
388 for _,head,items in self.sections:
389 if not head.endswith(':'):
390 print >>sys.stderr, "adding : to %r"%head
391 head = head + ":"
392 self.dumpSectionHeader(head)
393 for _,grafs in items:
394 self.dumpItem(grafs)
395 self.dumpEndOfSection()
396 if items is drupalBreakAfter:
397 self.dumpDrupalBreak()
398 self.dumpEndOfSections()
399 self.dumpEndOfChangelog()
401 # Let's turn bugs to html.
402 BUG_PAT = re.compile('(bug|ticket|feature)\s+(\d{4,5})', re.I)
403 def bug_html(m):
404 return "%s <a href='https://bugs.torproject.org/%s'>%s</a>" % (m.group(1), m.group(2), m.group(2))
406 class HTMLChangeLog(ChangeLog):
407 def __init__(self, *args, **kwargs):
408 ChangeLog.__init__(self, *args, **kwargs)
410 def htmlText(self, graf):
411 output = []
412 for line in graf:
413 line = line.rstrip().replace("&","&amp;")
414 line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
415 output.append(line.strip())
416 output = " ".join(output)
417 output = BUG_PAT.sub(bug_html, output)
418 sys.stdout.write(output)
420 def htmlPar(self, graf):
421 sys.stdout.write("<p>")
422 self.htmlText(graf)
423 sys.stdout.write("</p>\n")
425 def dumpPreheader(self, graf):
426 self.htmlPar(graf)
428 def dumpMainhead(self, head):
429 sys.stdout.write("<h2>%s</h2>"%head)
431 def dumpHeadGraf(self, graf):
432 self.htmlPar(graf)
434 def dumpSectionHeader(self, header):
435 header = header.replace(" o ", "", 1).lstrip()
436 sys.stdout.write(" <li>%s\n"%header)
437 sys.stdout.write(" <ul>\n")
439 def dumpEndOfSection(self):
440 sys.stdout.write(" </ul>\n\n")
442 def dumpEndOfChangelog(self):
443 pass
445 def dumpStartOfSections(self):
446 print "<ul>\n"
448 def dumpEndOfSections(self):
449 print "</ul>\n"
451 def dumpDrupalBreak(self):
452 print "\n</ul>\n"
453 print "<p>&nbsp;</p>"
454 print "\n<!--break-->\n\n"
455 print "<ul>"
457 def dumpItem(self, grafs):
458 grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
459 sys.stdout.write(" <li>")
460 if len(grafs) > 1:
461 for par in grafs:
462 self.htmlPar(par)
463 else:
464 self.htmlText(grafs[0])
465 print
467 op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
468 op.add_option('-W', '--no-wrap', action='store_false',
469 dest='wrapText', default=True,
470 help='Do not re-wrap paragraphs')
471 op.add_option('-S', '--no-sort', action='store_false',
472 dest='sort', default=True,
473 help='Do not sort or collate sections')
474 op.add_option('-o', '--output', dest='output',
475 default='-', metavar='FILE', help="write output to FILE")
476 op.add_option('-H', '--html', action='store_true',
477 dest='html', default=False,
478 help="generate an HTML fragment")
479 op.add_option('-1', '--first', action='store_true',
480 dest='firstOnly', default=False,
481 help="write only the first section")
482 op.add_option('-b', '--blog-header', action='store_true',
483 dest='blogOrder', default=False,
484 help="Write the header in blog order")
485 op.add_option('-B', '--blog', action='store_true',
486 dest='blogFormat', default=False,
487 help="Set all other options as appropriate for a blog post")
488 op.add_option('--inplace', action='store_true',
489 dest='inplace', default=False,
490 help="Alter the ChangeLog in place")
491 op.add_option('--drupal-break', action='store_true',
492 dest='drupalBreak', default=False,
493 help='Insert a drupal-friendly <!--break--> as needed')
495 options,args = op.parse_args()
497 if options.blogFormat:
498 options.blogOrder = True
499 options.html = True
500 options.sort = False
501 options.wrapText = False
502 options.firstOnly = True
503 options.drupalBreak = True
505 if len(args) > 1:
506 op.error("Too many arguments")
507 elif len(args) == 0:
508 fname = 'ChangeLog'
509 else:
510 fname = args[0]
512 if options.inplace:
513 assert options.output == '-'
514 options.output = fname
516 if fname != '-':
517 sys.stdin = open(fname, 'r')
519 nextline = None
521 if options.html:
522 ChangeLogClass = HTMLChangeLog
523 else:
524 ChangeLogClass = ChangeLog
526 CL = ChangeLogClass(wrapText=options.wrapText,
527 blogOrder=options.blogOrder,
528 drupalBreak=options.drupalBreak)
529 parser = head_parser
531 for line in sys.stdin:
532 line = line.rstrip()
533 tp = parser(line)
535 if tp == TP_SECHEAD:
536 parser = body_parser
537 elif tp == TP_END:
538 nextline = line
539 break
541 CL.addLine(tp,line)
543 CL.lint()
545 if options.output != '-':
546 fname_new = options.output+".new"
547 fname_out = options.output
548 sys.stdout = open(fname_new, 'w')
549 else:
550 fname_new = fname_out = None
552 if options.sort:
553 CL.collateAndSortSections()
555 CL.dump()
557 if options.firstOnly:
558 sys.exit(0)
560 if nextline is not None:
561 print nextline
563 for line in sys.stdin:
564 sys.stdout.write(line)
566 if fname_new is not None:
567 os.rename(fname_new, fname_out)