2 # Copyright (c) 2014-2015, The Tor Project, Inc.
3 # See LICENSE for licensing information
5 # This script reformats a section of the changelog to wrap everything to
6 # the right width and put blank lines in the right places. Eventually,
7 # it might include a linter.
9 # To run it, pipe a section of the changelog (starting with "Changes
10 # in Tor 0.x.y.z-alpha" through the script.)
17 # ==============================
18 # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
19 # algorithm, with totally ad hoc parameters!
21 # We're trying to minimize:
22 # The total of the cubes of ragged space on underflowed intermediate lines,
24 # 100 * the fourth power of overflowed characters
26 # .1 * a bit more than the cube of ragged space on the last line.
28 # OPENPAREN_PENALTY for each line that starts with (
30 # We use an obvious dynamic programming algorithm to sorta approximate this.
31 # It's not coded right or optimally, but it's fast enough for changelogs
33 # (Code found in an old directory of mine, lightly cleaned. -NM)
41 LASTLINE_UNDERFLOW_EXPONENT
= 1
42 LASTLINE_UNDERFLOW_PENALTY
= 1
44 UNDERFLOW_EXPONENT
= 3
48 OVERFLOW_PENALTY
= 2000
50 ORPHAN_PENALTY
= 10000
52 OPENPAREN_PENALTY
= 200
54 def generate_wrapping(words
, divisions
):
60 line
= " ".join(w
).replace("\xff ","-").replace("\xff","-")
61 lines
.append(line
.strip())
64 def wrapping_quality(words
, divisions
, width1
, width2
):
67 lines
= generate_wrapping(words
, divisions
)
76 total
+= OPENPAREN_PENALTY
79 total
+= OVERFLOW_PENALTY
* (
80 (length
- width
) ** OVERFLOW_EXPONENT
)
83 e
,p
= (LASTLINE_UNDERFLOW_EXPONENT
, LASTLINE_UNDERFLOW_PENALTY
)
85 total
+= ORPHAN_PENALTY
87 e
,p
= (UNDERFLOW_EXPONENT
, UNDERFLOW_PENALTY
)
89 total
+= p
* ((width
- length
) ** e
)
93 def wrap_graf(words
, prefix_len1
=0, prefix_len2
=0, width
=72):
94 wrapping_after
= [ (0,), ]
96 w1
= width
- prefix_len1
97 w2
= width
- prefix_len2
99 for i
in range(1, len(words
)+1):
103 t
= wrapping_after
[j
]
106 wq1
= wrapping_quality(words
, t1
, w1
, w2
)
107 wq2
= wrapping_quality(words
, t2
, w1
, w2
)
115 wrapping_after
.append( best_so_far
)
117 lines
= generate_wrapping(words
, wrapping_after
[-1])
121 def hyphenatable(word
):
125 if re
.match(r
'^[^\d\-]\D*-', word
):
126 stripped
= re
.sub(r
'^\W+','',word
)
127 stripped
= re
.sub(r
'\W+$','',word
)
128 return stripped
not in NO_HYPHENATE
132 def split_paragraph(s
):
133 "Split paragraph into words; tuned for Tor."
136 for word
in s
.split():
137 if hyphenatable(word
):
139 a
,word
= word
.split("-",1)
144 def fill(text
, width
, initial_indent
, subsequent_indent
):
145 words
= split_paragraph(text
)
146 lines
= wrap_graf(words
, len(initial_indent
), len(subsequent_indent
),
148 res
= [ initial_indent
, lines
[0], "\n" ]
149 for line
in lines
[1:]:
150 res
.append(subsequent_indent
)
155 # ==============================
167 def head_parser(line
):
168 if re
.match(r
'^Changes in', line
):
170 elif re
.match(r
'^[A-Za-z]', line
):
172 elif re
.match(r
'^ o ', line
):
174 elif re
.match(r
'^\s*$', line
):
179 def body_parser(line
):
180 if re
.match(r
'^ o ', line
):
182 elif re
.match(r
'^ -',line
):
184 elif re
.match(r
'^ \S', line
):
186 elif re
.match(r
'^\s*$', line
):
188 elif re
.match(r
'^Changes in', line
):
190 elif re
.match(r
'^\s+\S', line
):
193 print "Weird line %r"%line
195 def clean_head(head
):
199 m
= re
.match(r
'^ +o (.*)', s
)
201 print >>sys
.stderr
, "Can't score %r"%s
203 lw
= m
.group(1).lower()
204 if lw
.startswith("security") and "feature" not in lw
:
206 elif lw
.startswith("deprecated version"):
208 elif (('new' in lw
and 'requirement' in lw
) or
209 ('new' in lw
and 'dependenc' in lw
) or
210 ('build' in lw
and 'requirement' in lw
) or
211 ('removed' in lw
and 'platform' in lw
)):
213 elif lw
.startswith("major feature"):
215 elif lw
.startswith("major bug"):
217 elif lw
.startswith("major"):
219 elif lw
.startswith("minor feature"):
221 elif lw
.startswith("minor bug"):
223 elif lw
.startswith("minor"):
239 class ChangeLog(object):
240 def __init__(self
, wrapText
=True, blogOrder
=True, drupalBreak
=False):
246 self
.cursection
= None
248 self
.wrapText
= wrapText
249 self
.blogOrder
= blogOrder
250 self
.drupalBreak
= drupalBreak
252 def addLine(self
, tp
, line
):
255 if tp
== TP_MAINHEAD
:
256 assert not self
.mainhead
259 elif tp
== TP_PREHEAD
:
260 self
.prehead
.append(line
)
262 elif tp
== TP_HEADTEXT
:
263 if self
.curgraf
is None:
265 self
.headtext
.append(self
.curgraf
)
266 self
.curgraf
.append(line
)
271 elif tp
== TP_SECHEAD
:
272 self
.cursection
= [ self
.lineno
, line
, [] ]
273 self
.sections
.append(self
.cursection
)
275 elif tp
== TP_ITEMFIRST
:
276 item
= ( self
.lineno
, [ [line
] ])
277 self
.curgraf
= item
[1][0]
278 self
.cursection
[2].append(item
)
280 elif tp
== TP_ITEMBODY
:
281 if self
.curgraf
is None:
283 self
.cursection
[2][-1][1].append(self
.curgraf
)
284 self
.curgraf
.append(line
)
287 assert "This" is "unreachable"
289 def lint_head(self
, line
, head
):
290 m
= re
.match(r
'^ *o ([^\(]+)((?:\([^\)]+\))?):', head
)
292 print >>sys
.stderr
, "Weird header format on line %s"%line
294 def lint_item(self
, line
, grafs
, head_type
):
299 for sec_line
, sec_head
, items
in self
.sections
:
300 head_type
= self
.lint_head(sec_line
, sec_head
)
301 for item_line
, grafs
in items
:
302 self
.lint_item(item_line
, grafs
, head_type
)
304 def dumpGraf(self
,par
,indent1
,indent2
=-1):
305 if not self
.wrapText
:
312 text
= " ".join(re
.sub(r
'\s+', ' ', line
.strip()) for line
in par
)
314 sys
.stdout
.write(fill(text
,
316 initial_indent
=" "*indent1
,
317 subsequent_indent
=" "*indent2
))
319 def dumpPreheader(self
, graf
):
320 self
.dumpGraf(graf
, 0)
323 def dumpMainhead(self
, head
):
326 def dumpHeadGraf(self
, graf
):
327 self
.dumpGraf(graf
, 2)
330 def dumpSectionHeader(self
, header
):
333 def dumpStartOfSections(self
):
336 def dumpEndOfSections(self
):
339 def dumpEndOfSection(self
):
342 def dumpEndOfChangelog(self
):
345 def dumpDrupalBreak(self
):
348 def dumpItem(self
, grafs
):
349 self
.dumpGraf(grafs
[0],4,6)
350 for par
in grafs
[1:]:
352 self
.dumpGraf(par
,6,6)
354 def collateAndSortSections(self
):
357 for _
, head
, items
in self
.sections
:
358 head
= clean_head(head
)
360 s
= sectionsByHead
[head
]
362 s
= sectionsByHead
[head
] = []
363 heads
.append( (head_score(head
), head
.lower(), head
, s
) )
368 self
.sections
= [ (0, head
, items
) for _1
,_2
,head
,items
in heads
]
372 self
.dumpPreheader(self
.prehead
)
374 if not self
.blogOrder
:
375 self
.dumpMainhead(self
.mainhead
)
377 for par
in self
.headtext
:
378 self
.dumpHeadGraf(par
)
381 self
.dumpMainhead(self
.mainhead
)
383 drupalBreakAfter
= None
384 if self
.drupalBreak
and len(self
.sections
) > 4:
385 drupalBreakAfter
= self
.sections
[1][2]
387 self
.dumpStartOfSections()
388 for _
,head
,items
in self
.sections
:
389 if not head
.endswith(':'):
390 print >>sys
.stderr
, "adding : to %r"%head
392 self
.dumpSectionHeader(head
)
393 for _
,grafs
in items
:
395 self
.dumpEndOfSection()
396 if items
is drupalBreakAfter
:
397 self
.dumpDrupalBreak()
398 self
.dumpEndOfSections()
399 self
.dumpEndOfChangelog()
401 # Let's turn bugs to html.
402 BUG_PAT
= re
.compile('(bug|ticket|feature)\s+(\d{4,5})', re
.I
)
404 return "%s <a href='https://bugs.torproject.org/%s'>%s</a>" % (m
.group(1), m
.group(2), m
.group(2))
406 class HTMLChangeLog(ChangeLog
):
407 def __init__(self
, *args
, **kwargs
):
408 ChangeLog
.__init
__(self
, *args
, **kwargs
)
410 def htmlText(self
, graf
):
413 line
= line
.rstrip().replace("&","&")
414 line
= line
.rstrip().replace("<","<").replace(">",">")
415 output
.append(line
.strip())
416 output
= " ".join(output
)
417 output
= BUG_PAT
.sub(bug_html
, output
)
418 sys
.stdout
.write(output
)
420 def htmlPar(self
, graf
):
421 sys
.stdout
.write("<p>")
423 sys
.stdout
.write("</p>\n")
425 def dumpPreheader(self
, graf
):
428 def dumpMainhead(self
, head
):
429 sys
.stdout
.write("<h2>%s</h2>"%head
)
431 def dumpHeadGraf(self
, graf
):
434 def dumpSectionHeader(self
, header
):
435 header
= header
.replace(" o ", "", 1).lstrip()
436 sys
.stdout
.write(" <li>%s\n"%header
)
437 sys
.stdout
.write(" <ul>\n")
439 def dumpEndOfSection(self
):
440 sys
.stdout
.write(" </ul>\n\n")
442 def dumpEndOfChangelog(self
):
445 def dumpStartOfSections(self
):
448 def dumpEndOfSections(self
):
451 def dumpDrupalBreak(self
):
453 print "<p> </p>"
454 print "\n<!--break-->\n\n"
457 def dumpItem(self
, grafs
):
458 grafs
[0][0] = grafs
[0][0].replace(" - ", "", 1).lstrip()
459 sys
.stdout
.write(" <li>")
464 self
.htmlText(grafs
[0])
467 op
= optparse
.OptionParser(usage
="usage: %prog [options] [filename]")
468 op
.add_option('-W', '--no-wrap', action
='store_false',
469 dest
='wrapText', default
=True,
470 help='Do not re-wrap paragraphs')
471 op
.add_option('-S', '--no-sort', action
='store_false',
472 dest
='sort', default
=True,
473 help='Do not sort or collate sections')
474 op
.add_option('-o', '--output', dest
='output',
475 default
='-', metavar
='FILE', help="write output to FILE")
476 op
.add_option('-H', '--html', action
='store_true',
477 dest
='html', default
=False,
478 help="generate an HTML fragment")
479 op
.add_option('-1', '--first', action
='store_true',
480 dest
='firstOnly', default
=False,
481 help="write only the first section")
482 op
.add_option('-b', '--blog-header', action
='store_true',
483 dest
='blogOrder', default
=False,
484 help="Write the header in blog order")
485 op
.add_option('-B', '--blog', action
='store_true',
486 dest
='blogFormat', default
=False,
487 help="Set all other options as appropriate for a blog post")
488 op
.add_option('--inplace', action
='store_true',
489 dest
='inplace', default
=False,
490 help="Alter the ChangeLog in place")
491 op
.add_option('--drupal-break', action
='store_true',
492 dest
='drupalBreak', default
=False,
493 help='Insert a drupal-friendly <!--break--> as needed')
495 options
,args
= op
.parse_args()
497 if options
.blogFormat
:
498 options
.blogOrder
= True
501 options
.wrapText
= False
502 options
.firstOnly
= True
503 options
.drupalBreak
= True
506 op
.error("Too many arguments")
513 assert options
.output
== '-'
514 options
.output
= fname
517 sys
.stdin
= open(fname
, 'r')
522 ChangeLogClass
= HTMLChangeLog
524 ChangeLogClass
= ChangeLog
526 CL
= ChangeLogClass(wrapText
=options
.wrapText
,
527 blogOrder
=options
.blogOrder
,
528 drupalBreak
=options
.drupalBreak
)
531 for line
in sys
.stdin
:
545 if options
.output
!= '-':
546 fname_new
= options
.output
+".new"
547 fname_out
= options
.output
548 sys
.stdout
= open(fname_new
, 'w')
550 fname_new
= fname_out
= None
553 CL
.collateAndSortSections()
557 if options
.firstOnly
:
560 if nextline
is not None:
563 for line
in sys
.stdin
:
564 sys
.stdout
.write(line
)
566 if fname_new
is not None:
567 os
.rename(fname_new
, fname_out
)