scripts/maint/format_changelog.py

   1 #!/usr/bin/python
   2 # Copyright (c) 2014-2015, The Tor Project, Inc.
   3 # See LICENSE for licensing information
   4 #
   5 # This script reformats a section of the changelog to wrap everything to
   6 # the right width and put blank lines in the right places.  Eventually,
   7 # it might include a linter.
   8 #
   9 # To run it, pipe a section of the changelog (starting with "Changes
  10 # in Tor 0.x.y.z-alpha" through the script.)
  11
  12 import os
  13 import re
  14 import sys
  15 import optparse
  16
  17 # ==============================
  18 # Oh, look!  It's a cruddy approximation to Knuth's elegant text wrapping
  19 # algorithm, with totally ad hoc parameters!
  20 #
  21 # We're trying to minimize:
  22 #    The total of the cubes of ragged space on underflowed intermediate lines,
  23 #  PLUS
  24 #    100 * the fourth power of overflowed characters
  25 #  PLUS
  26 #    .1 * a bit more than the cube of ragged space on the last line.
  27 #  PLUS
  28 #    OPENPAREN_PENALTY for each line that starts with (
  29 #
  30 # We use an obvious dynamic programming algorithm to sorta approximate this.
  31 # It's not coded right or optimally, but it's fast enough for changelogs
  32 #
  33 # (Code found in an old directory of mine, lightly cleaned. -NM)
  34
  35 NO_HYPHENATE=set("""
  36 pf-divert
  37 tor-resolve
  38 tor-gencert
  39 """.split())
  40
  41 LASTLINE_UNDERFLOW_EXPONENT = 1
  42 LASTLINE_UNDERFLOW_PENALTY = 1
  43
  44 UNDERFLOW_EXPONENT = 3
  45 UNDERFLOW_PENALTY = 1
  46
  47 OVERFLOW_EXPONENT = 4
  48 OVERFLOW_PENALTY = 2000
  49
  50 ORPHAN_PENALTY = 10000
  51
  52 OPENPAREN_PENALTY = 200
  53
  54 def generate_wrapping(words, divisions):
  55     lines = []
  56     last = 0
  57     for i in divisions:
  58         w = words[last:i]
  59         last = i
  60         line = " ".join(w).replace("\xff ","-").replace("\xff","-")
  61         lines.append(line.strip())
  62     return lines
  63
  64 def wrapping_quality(words, divisions, width1, width2):
  65     total = 0.0
  66
  67     lines = generate_wrapping(words, divisions)
  68     for line in lines:
  69         length = len(line)
  70         if line is lines[0]:
  71             width = width1
  72         else:
  73             width = width2
  74
  75         if line[0:1] == '(':
  76             total += OPENPAREN_PENALTY
  77
  78         if length > width:
  79             total += OVERFLOW_PENALTY * (
  80                 (length - width) ** OVERFLOW_EXPONENT )
  81         else:
  82             if line is lines[-1]:
  83                 e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
  84                 if " " not in line:
  85                     total += ORPHAN_PENALTY
  86             else:
  87                 e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
  88
  89             total += p * ((width - length) ** e)
  90
  91     return total
  92
  93 def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
  94     wrapping_after = [ (0,), ]
  95
  96     w1 = width - prefix_len1
  97     w2 = width - prefix_len2
  98
  99     for i in range(1, len(words)+1):
 100         best_so_far = None
 101         best_score = 1e300
 102         for j in range(i):
 103             t = wrapping_after[j]
 104             t1 = t[:-1] + (i,)
 105             t2 = t + (i,)
 106             wq1 = wrapping_quality(words, t1, w1, w2)
 107             wq2 = wrapping_quality(words, t2, w1, w2)
 108
 109             if wq1 < best_score:
 110                 best_so_far = t1
 111                 best_score = wq1
 112             if wq2 < best_score:
 113                 best_so_far = t2
 114                 best_score = wq2
 115         wrapping_after.append( best_so_far )
 116
 117     lines = generate_wrapping(words, wrapping_after[-1])
 118
 119     return lines
 120
 121 def hyphenatable(word):
 122     if "--" in word:
 123         return False
 124
 125     if re.match(r'^[^\d\-]\D*-', word):
 126         stripped = re.sub(r'^\W+','',word)
 127         stripped = re.sub(r'\W+$','',word)
 128         return stripped not in NO_HYPHENATE
 129     else:
 130         return False
 131
 132 def split_paragraph(s):
 133     "Split paragraph into words; tuned for Tor."
 134
 135     r = []
 136     for word in s.split():
 137         if hyphenatable(word):
 138             while "-" in word:
 139                 a,word = word.split("-",1)
 140                 r.append(a+"\xff")
 141         r.append(word)
 142     return r
 143
 144 def fill(text, width, initial_indent, subsequent_indent):
 145     words = split_paragraph(text)
 146     lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
 147                       width)
 148     res = [ initial_indent, lines[0], "\n" ]
 149     for line in lines[1:]:
 150         res.append(subsequent_indent)
 151         res.append(line)
 152         res.append("\n")
 153     return "".join(res)
 154
 155 # ==============================
 156
 157
 158 TP_MAINHEAD = 0
 159 TP_HEADTEXT = 1
 160 TP_BLANK = 2
 161 TP_SECHEAD = 3
 162 TP_ITEMFIRST = 4
 163 TP_ITEMBODY = 5
 164 TP_END = 6
 165 TP_PREHEAD = 7
 166
 167 def head_parser(line):
 168     if re.match(r'^Changes in', line):
 169         return TP_MAINHEAD
 170     elif re.match(r'^[A-Za-z]', line):
 171         return TP_PREHEAD
 172     elif re.match(r'^  o ', line):
 173         return TP_SECHEAD
 174     elif re.match(r'^\s*$', line):
 175         return TP_BLANK
 176     else:
 177         return TP_HEADTEXT
 178
 179 def body_parser(line):
 180     if re.match(r'^  o ', line):
 181         return TP_SECHEAD
 182     elif re.match(r'^    -',line):
 183         return TP_ITEMFIRST
 184     elif re.match(r'^      \S', line):
 185         return TP_ITEMBODY
 186     elif re.match(r'^\s*$', line):
 187         return TP_BLANK
 188     elif re.match(r'^Changes in', line):
 189         return TP_END
 190     elif re.match(r'^\s+\S', line):
 191         return TP_HEADTEXT
 192     else:
 193         print "Weird line %r"%line
 194
 195 def clean_head(head):
 196     return head
 197
 198 def head_score(s):
 199     m = re.match(r'^ +o (.*)', s)
 200     if not m:
 201         print >>sys.stderr, "Can't score %r"%s
 202         return 99999
 203     lw = m.group(1).lower()
 204     if lw.startswith("security") and "feature" not in lw:
 205         score = -300
 206     elif lw.startswith("deprecated version"):
 207         score = -200
 208     elif (('new' in lw and 'requirement' in lw) or
 209           ('new' in lw and 'dependenc' in lw) or
 210           ('build' in lw and 'requirement' in lw) or
 211           ('removed' in lw and 'platform' in lw)):
 212         score = -100
 213     elif lw.startswith("major feature"):
 214         score = 00
 215     elif lw.startswith("major bug"):
 216         score = 50
 217     elif lw.startswith("major"):
 218         score = 70
 219     elif lw.startswith("minor feature"):
 220         score = 200
 221     elif lw.startswith("minor bug"):
 222         score = 250
 223     elif lw.startswith("minor"):
 224         score = 270
 225     else:
 226         score = 1000
 227
 228     if 'secur' in lw:
 229         score -= 2
 230
 231     if "(other)" in lw:
 232         score += 2
 233
 234     if '(' not in lw:
 235         score -= 1
 236
 237     return score
 238
 239 class ChangeLog(object):
 240     def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
 241         self.prehead = []
 242         self.mainhead = None
 243         self.headtext = []
 244         self.curgraf = None
 245         self.sections = []
 246         self.cursection = None
 247         self.lineno = 0
 248         self.wrapText = wrapText
 249         self.blogOrder = blogOrder
 250         self.drupalBreak = drupalBreak
 251
 252     def addLine(self, tp, line):
 253         self.lineno += 1
 254
 255         if tp == TP_MAINHEAD:
 256             assert not self.mainhead
 257             self.mainhead = line
 258
 259         elif tp == TP_PREHEAD:
 260             self.prehead.append(line)
 261
 262         elif tp == TP_HEADTEXT:
 263             if self.curgraf is None:
 264                 self.curgraf = []
 265                 self.headtext.append(self.curgraf)
 266             self.curgraf.append(line)
 267
 268         elif tp == TP_BLANK:
 269             self.curgraf = None
 270
 271         elif tp == TP_SECHEAD:
 272             self.cursection = [ self.lineno, line, [] ]
 273             self.sections.append(self.cursection)
 274
 275         elif tp == TP_ITEMFIRST:
 276             item = ( self.lineno, [ [line] ])
 277             self.curgraf = item[1][0]
 278             self.cursection[2].append(item)
 279
 280         elif tp == TP_ITEMBODY:
 281             if self.curgraf is None:
 282                 self.curgraf = []
 283                 self.cursection[2][-1][1].append(self.curgraf)
 284             self.curgraf.append(line)
 285
 286         else:
 287             assert "This" is "unreachable"
 288
 289     def lint_head(self, line, head):
 290         m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
 291         if not m:
 292             print >>sys.stderr, "Weird header format on line %s"%line
 293
 294     def lint_item(self, line, grafs, head_type):
 295         pass
 296
 297     def lint(self):
 298         self.head_lines = {}
 299         for sec_line, sec_head, items in self.sections:
 300             head_type = self.lint_head(sec_line, sec_head)
 301             for item_line, grafs in items:
 302                 self.lint_item(item_line, grafs, head_type)
 303
 304     def dumpGraf(self,par,indent1,indent2=-1):
 305         if not self.wrapText:
 306             for line in par:
 307                 print line
 308             return
 309
 310         if indent2 == -1:
 311             indent2 = indent1
 312         text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
 313
 314         sys.stdout.write(fill(text,
 315                               width=72,
 316                               initial_indent=" "*indent1,
 317                               subsequent_indent=" "*indent2))
 318
 319     def dumpPreheader(self, graf):
 320         self.dumpGraf(graf, 0)
 321         print
 322
 323     def dumpMainhead(self, head):
 324         print head
 325
 326     def dumpHeadGraf(self, graf):
 327         self.dumpGraf(graf, 2)
 328         print
 329
 330     def dumpSectionHeader(self, header):
 331         print header
 332
 333     def dumpStartOfSections(self):
 334         pass
 335
 336     def dumpEndOfSections(self):
 337         pass
 338
 339     def dumpEndOfSection(self):
 340         print
 341
 342     def dumpEndOfChangelog(self):
 343         print
 344
 345     def dumpDrupalBreak(self):
 346         pass
 347
 348     def dumpItem(self, grafs):
 349         self.dumpGraf(grafs[0],4,6)
 350         for par in grafs[1:]:
 351             print
 352             self.dumpGraf(par,6,6)
 353
 354     def collateAndSortSections(self):
 355         heads = []
 356         sectionsByHead = { }
 357         for _, head, items in self.sections:
 358             head = clean_head(head)
 359             try:
 360                 s = sectionsByHead[head]
 361             except KeyError:
 362                 s = sectionsByHead[head] = []
 363                 heads.append( (head_score(head), head.lower(), head, s) )
 364
 365             s.extend(items)
 366
 367         heads.sort()
 368         self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
 369
 370     def dump(self):
 371         if self.prehead:
 372             self.dumpPreheader(self.prehead)
 373
 374         if not self.blogOrder:
 375             self.dumpMainhead(self.mainhead)
 376
 377         for par in self.headtext:
 378             self.dumpHeadGraf(par)
 379
 380         if self.blogOrder:
 381             self.dumpMainhead(self.mainhead)
 382
 383         drupalBreakAfter = None
 384         if self.drupalBreak and len(self.sections) > 4:
 385             drupalBreakAfter = self.sections[1][2]
 386
 387         self.dumpStartOfSections()
 388         for _,head,items in self.sections:
 389             if not head.endswith(':'):
 390                 print >>sys.stderr, "adding : to %r"%head
 391                 head = head + ":"
 392             self.dumpSectionHeader(head)
 393             for _,grafs in items:
 394                 self.dumpItem(grafs)
 395             self.dumpEndOfSection()
 396             if items is drupalBreakAfter:
 397                 self.dumpDrupalBreak()
 398         self.dumpEndOfSections()
 399         self.dumpEndOfChangelog()
 400
 401 # Let's turn bugs to html.
 402 BUG_PAT = re.compile('(bug|ticket|feature)\s+(\d{4,5})', re.I)
 403 def bug_html(m):
 404     return "%s <a href='https://bugs.torproject.org/%s'>%s</a>" % (m.group(1), m.group(2), m.group(2))
 405
 406 class HTMLChangeLog(ChangeLog):
 407     def __init__(self, *args, **kwargs):
 408         ChangeLog.__init__(self, *args, **kwargs)
 409
 410     def htmlText(self, graf):
 411         output = []
 412         for line in graf:
 413             line = line.rstrip().replace("&","&amp;")
 414             line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
 415             output.append(line.strip())
 416         output = " ".join(output)
 417         output = BUG_PAT.sub(bug_html, output)
 418         sys.stdout.write(output)
 419
 420     def htmlPar(self, graf):
 421         sys.stdout.write("<p>")
 422         self.htmlText(graf)
 423         sys.stdout.write("</p>\n")
 424
 425     def dumpPreheader(self, graf):
 426         self.htmlPar(graf)
 427
 428     def dumpMainhead(self, head):
 429         sys.stdout.write("<h2>%s</h2>"%head)
 430
 431     def dumpHeadGraf(self, graf):
 432         self.htmlPar(graf)
 433
 434     def dumpSectionHeader(self, header):
 435         header = header.replace(" o ", "", 1).lstrip()
 436         sys.stdout.write("  <li>%s\n"%header)
 437         sys.stdout.write("  <ul>\n")
 438
 439     def dumpEndOfSection(self):
 440         sys.stdout.write("  </ul>\n\n")
 441
 442     def dumpEndOfChangelog(self):
 443         pass
 444
 445     def dumpStartOfSections(self):
 446         print "<ul>\n"
 447
 448     def dumpEndOfSections(self):
 449         print "</ul>\n"
 450
 451     def dumpDrupalBreak(self):
 452         print "\n</ul>\n"
 453         print "<p>&nbsp;</p>"
 454         print "\n<!--break-->\n\n"
 455         print "<ul>"
 456
 457     def dumpItem(self, grafs):
 458         grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
 459         sys.stdout.write("  <li>")
 460         if len(grafs) > 1:
 461             for par in grafs:
 462                 self.htmlPar(par)
 463         else:
 464             self.htmlText(grafs[0])
 465         print
 466
 467 op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
 468 op.add_option('-W', '--no-wrap', action='store_false',
 469               dest='wrapText', default=True,
 470               help='Do not re-wrap paragraphs')
 471 op.add_option('-S', '--no-sort', action='store_false',
 472               dest='sort', default=True,
 473               help='Do not sort or collate sections')
 474 op.add_option('-o', '--output', dest='output',
 475               default='-', metavar='FILE', help="write output to FILE")
 476 op.add_option('-H', '--html', action='store_true',
 477               dest='html', default=False,
 478               help="generate an HTML fragment")
 479 op.add_option('-1', '--first', action='store_true',
 480               dest='firstOnly', default=False,
 481               help="write only the first section")
 482 op.add_option('-b', '--blog-header', action='store_true',
 483               dest='blogOrder', default=False,
 484               help="Write the header in blog order")
 485 op.add_option('-B', '--blog', action='store_true',
 486               dest='blogFormat', default=False,
 487               help="Set all other options as appropriate for a blog post")
 488 op.add_option('--inplace', action='store_true',
 489               dest='inplace', default=False,
 490               help="Alter the ChangeLog in place")
 491 op.add_option('--drupal-break', action='store_true',
 492               dest='drupalBreak', default=False,
 493               help='Insert a drupal-friendly <!--break--> as needed')
 494
 495 options,args = op.parse_args()
 496
 497 if options.blogFormat:
 498     options.blogOrder = True
 499     options.html = True
 500     options.sort = False
 501     options.wrapText = False
 502     options.firstOnly = True
 503     options.drupalBreak = True
 504
 505 if len(args) > 1:
 506     op.error("Too many arguments")
 507 elif len(args) == 0:
 508     fname = 'ChangeLog'
 509 else:
 510     fname = args[0]
 511
 512 if options.inplace:
 513     assert options.output == '-'
 514     options.output = fname
 515
 516 if fname != '-':
 517     sys.stdin = open(fname, 'r')
 518
 519 nextline = None
 520
 521 if options.html:
 522     ChangeLogClass = HTMLChangeLog
 523 else:
 524     ChangeLogClass = ChangeLog
 525
 526 CL = ChangeLogClass(wrapText=options.wrapText,
 527                     blogOrder=options.blogOrder,
 528                     drupalBreak=options.drupalBreak)
 529 parser = head_parser
 530
 531 for line in sys.stdin:
 532     line = line.rstrip()
 533     tp = parser(line)
 534
 535     if tp == TP_SECHEAD:
 536         parser = body_parser
 537     elif tp == TP_END:
 538         nextline = line
 539         break
 540
 541     CL.addLine(tp,line)
 542
 543 CL.lint()
 544
 545 if options.output != '-':
 546     fname_new = options.output+".new"
 547     fname_out = options.output
 548     sys.stdout = open(fname_new, 'w')
 549 else:
 550     fname_new = fname_out = None
 551
 552 if options.sort:
 553     CL.collateAndSortSections()
 554
 555 CL.dump()
 556
 557 if options.firstOnly:
 558     sys.exit(0)
 559
 560 if nextline is not None:
 561     print nextline
 562
 563 for line in sys.stdin:
 564     sys.stdout.write(line)
 565
 566 if fname_new is not None:
 567     os.rename(fname_new, fname_out)