Update html-plain writer.
[docutils.git] / sandbox / rstdiff / rstdiff.py
blob9b6da11e1844a55b86fff23225a856d15e23f29e
1 #!/usr/bin/env python
3 # Copyright (C) 2010 Stefan Merten
5 # rstdiff.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18 # 02111-1307, USA.
20 """
21 Generates a structural diff from two reStructuredText input documents
22 and produces an annotated result.
23 """
25 __docformat__ = 'reStructuredText'
27 try:
28 import locale
29 locale.setlocale(locale.LC_ALL, '')
30 except:
31 pass
33 import os, re, sys
35 from pprint import pformat
36 from optparse import SUPPRESS_HELP
38 import docutils
39 from docutils import frontend, writers, nodes, SettingsSpec
40 from docutils.core import Publisher
41 from docutils.utils import SystemMessage, Reporter, new_reporter, new_document
42 from docutils.frontend import OptionParser, make_paths_absolute, validate_boolean
43 from docutils.transforms import Transform
45 from treediff import TreeMatcher, HashableNodeImpl
47 ###############################################################################
48 ###############################################################################
49 # Command line specification
51 description = ("""Generates a structural diff from two reStructuredText input
52 documents and produces an annotated result. """)
54 writerOption = 'writer'
55 writerDefault = 'xml'
56 writerArgRE1 = '^--' + writerOption + '=' + '(.*)$'
58 oldOption = 'old'
59 bothOption = 'both'
60 newOption = 'new'
62 def switchOptionsCallback(option, opt, value, parser, to):
63 """Callback for `optparse`."""
64 switchOptions(parser.values, to)
66 settings_spec = (
67 'rstdiff options',
68 None,
69 (('Select writer to write output with (default "xml").',
70 ['--' + writerOption],
71 {}),
72 ('Following options apply to the old input document'
73 + ' (default: both input documents).',
74 ['--' + oldOption],
75 { 'action': 'callback',
76 'callback': switchOptionsCallback,
77 'callback_args': ( oldOption, ),
78 }),
79 ('Following options apply to the new input document'
80 + ' (default: both input documents).',
81 ['--' + newOption],
82 { 'action': 'callback',
83 'callback': switchOptionsCallback,
84 'callback_args': ( newOption, ),
85 }),
86 ('Following options apply to both input documents'
87 + ' (default).',
88 ['--' + bothOption],
89 { 'action': 'callback',
90 'callback': switchOptionsCallback,
91 'callback_args': ( bothOption, ),
92 }),
93 ('Compare sections by comparing their names (default); '
94 + 'useful when section titles are stable but sections change',
95 ['--compare-sections-by-names'],
96 { 'action': 'store_true',
97 'default': 1, 'validator': validate_boolean}),
98 ('Compare sections normally; useful when section titles change',
99 ['--compare-sections-normally'],
100 { 'action': 'store_false', 'dest': 'compare_sections_by_names'}),
101 (SUPPRESS_HELP, ['--dump-rstdiff'], {'action': 'store_true'}),
105 settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace',
106 writerOption: writerDefault}
108 config_section = 'rstdiff'
110 usage = '%prog [options]... <old> [<new> [<output>]]'
112 ###############################################################################
113 # Classes for three argument command lines
115 switchableMultiOptions = ( 'strip_elements_with_classes', 'strip_classes', )
116 switchableOptions = (
117 'title', 'generator', 'datestamp',
118 'source_link', 'source_url',
119 'toc_backlinks', 'footnote_backlinks',
120 'sectnum_xform', 'doctitle_xform', 'docinfo_xform', 'sectsubtitle_xform',
121 'strip_comments',
122 'input_encoding', 'input_encoding_error_handler',
123 'language_code',
124 'pep_references', 'pep_base_url', 'pep_file_url_template',
125 'rfc_references', 'rfc_base_url',
126 'trim_footnote_reference_space',
127 'file_insertion_enabled', 'raw_enabled',
128 'auto_id_prefix', 'id_prefix',
129 ) + switchableMultiOptions
131 def switchOptions(values, to):
132 """Switch `values` so following options apply to input document `to`."""
133 lastTo = getattr(values, '_optionsTo', '_' + bothOption)
134 lastTarget = getattr(values, lastTo, None)
135 if not lastTarget:
136 lastTarget = {}
137 setattr(values, lastTo, lastTarget)
138 target = getattr(values, '_' + to, None)
139 if not target:
140 target = {}
141 setattr(values, to, target)
142 for opt in switchableOptions:
143 if hasattr(values, opt):
144 # Save last option
145 lastTarget[opt] = getattr(values, opt)
146 delattr(values, opt)
147 if opt in target:
148 # Restore old option
149 setattr(values, opt, target[opt])
150 values._optionsTo = '_' + to
152 def useOptions(values, to):
153 """Set `values` so use options applying to input document `to`."""
154 for opt in switchableOptions:
155 if hasattr(values, opt):
156 delattr(values, opt)
157 for src in ( '_' + to, '_' + bothOption, ):
158 if hasattr(values, src) and opt in getattr(values, src):
159 if opt in switchableMultiOptions:
160 if not hasattr(values, opt):
161 setattr(values, opt, [])
162 if getattr(values, src)[opt] is not None:
163 getattr(values, opt).extend(getattr(values, src)[opt])
164 else:
165 setattr(values, opt, getattr(values, src)[opt])
166 break
168 class Publisher3Args(Publisher):
170 def setup_option_parser(self, usage=None, description=None,
171 settings_spec=None, config_section=None,
172 **defaults):
173 if config_section:
174 if not settings_spec:
175 settings_spec = SettingsSpec()
176 settings_spec.config_section = config_section
177 parts = config_section.split()
178 if len(parts) > 1 and parts[-1] == 'application':
179 settings_spec.config_section_dependencies = ['applications']
180 #@@@ Add self.source & self.destination to components in future?
181 option_parser = OptionParser3Args(
182 components=(self.parser, self.reader, self.writer, settings_spec),
183 defaults=defaults, read_config_files=1,
184 usage=usage, description=description)
185 return option_parser
187 class OptionParser3Args(OptionParser):
189 def check_values(self, values, args):
190 """Store positional arguments as runtime settings."""
191 # Complete a possible switch
192 switchOptions(values, bothOption)
193 values._old_source, values._new_source, values._destination = self.check_args(args)
194 make_paths_absolute(values.__dict__, self.relative_path_settings,
195 os.getcwd())
196 values._config_files = self.config_files
197 return values
199 def check_args(self, args):
200 old_source = new_source = destination = None
201 if not args:
202 self.error('At least 1 argument required.')
203 else:
204 old_source = args.pop(0)
205 if old_source == '-': # means stdin
206 old_source = None
207 if args:
208 new_source = args.pop(0)
209 if new_source == '-': # means stdin
210 new_source = None
211 if args:
212 destination = args.pop(0)
213 if destination == '-': # means stdout
214 destination = None
215 if args:
216 self.error('Maximum 3 arguments allowed.')
217 if old_source is None and new_source is None:
218 self.error('Old and new source may not both use stdin.')
219 if (old_source and old_source == destination
220 or new_source and new_source == destination):
221 self.error('Do not specify the same file for both source and '
222 'destination. It will clobber the source file.')
223 return old_source, new_source, destination
225 ###############################################################################
226 ###############################################################################
227 # Helpers
229 class Opcode(object):
230 """Encapsulates opcodes as returned by `TreeMatcher.get_opcodes()`"""
232 Replace = 'replace'
233 Delete = 'delete'
234 Insert = 'insert'
235 Equal = 'equal'
236 Descend = 'descend'
238 _tuple = None
240 def __init__(self, opcodeTuple):
241 """Initialize from a tuple returned by `TreeMatcher.get_opcodes()`"""
242 self._tuple = list(opcodeTuple)
244 def getCommand(self):
245 """Return the command."""
246 return self._tuple[0]
248 def getOldRange(self):
249 """Returns the range pertaining to an old list."""
250 return ( self._tuple[1], self._tuple[2], )
252 def getNewRange(self):
253 """Returns the range pertaining to a new list."""
254 return ( self._tuple[3], self._tuple[4], )
256 def getSubOpcodes(self):
257 """Return the sub-opcodes in case of `command` == 'descend' or
258 `None`."""
259 if self._tuple[0] != self.Descend:
260 return None
261 return self._tuple[5]
263 def resolveOpcode(self, oldList, newList):
264 """Resolves opcode pertaining to `oldList` and `newList`. Returns tuple
265 consisting of
267 command
268 Same as self.getCommand().
270 oldRange
271 The range of elements in `oldList` affected by the opcode.
273 newRange
274 The range of elements in `newList` affected by the opcode.
276 subOpcodes
277 Same as self.getSubOpcodes().
279 oldRange = self.getOldRange()
280 newRange = self.getNewRange()
281 return ( self.getCommand(), oldList[oldRange[0]:oldRange[1]],
282 newList[newRange[0]:newRange[1]], self.getSubOpcodes())
284 def setSubOpcodes(self, opcodes):
285 """Set the sub-opcodes to a new list."""
286 if self._tuple[0] != self.Descend:
287 raise TypeError("Can not set subopcodes of a %r opcode"
288 % ( self._tuple[0], ))
289 self._tuple[5] = opcodes
291 def setCommand(self, command):
292 """Set a new command adapting subopcodes."""
293 if self._tuple[0] == command:
294 return
295 self._tuple[0] = command
296 if command == self.Descend:
297 self._tuple[5] = [ ]
298 else:
299 self._tuple = self._tuple[0:5]
301 def setOldRange(self, range):
302 """Sets the range pertaining to an old list."""
303 ( self._tuple[1], self._tuple[2], ) = range
305 def setNewRange(self, range):
306 """Sets the range pertaining to a new list."""
307 ( self._tuple[3], self._tuple[4], ) = range
309 def asTuple(self):
310 """Return the opcode as a tuple."""
311 return tuple(self._tuple)
313 ###############################################################################
314 ###############################################################################
315 # Additional docutils stuff
317 ###############################################################################
318 # Node types
320 class White(nodes.Text):
321 """A piece of text containing only whitespace."""
323 tagname = '#white'
325 """A regular expression matching strings for this class and returning
326 them as the first match."""
327 # TODO Could be subject to an option
328 re = '(\\s+)'
330 class Word(nodes.Text):
331 """A piece of text containing exactly one word."""
333 tagname = '#word'
335 @staticmethod
336 def splitText(text):
337 """Splits text and returns a sequence of `Word` and `White`
338 objects. Returns an empty sequence for an empty `text`."""
340 subs = re.split(White.re, text.astext())
341 result = [ ]
342 if not subs:
343 return result
344 elif re.match(White.re, subs[0]):
345 ( current, next, ) = ( White, Word, )
346 else:
347 ( current, next, ) = ( Word, White, )
348 for sub in subs:
349 result.append(current(sub))
350 ( current, next, ) = ( next, current, )
351 return result
353 ###############################################################################
354 # Transformers
356 class Text2Words(Transform):
357 """Transforms a `Text` node into a sequence of `Word`/`White`."""
359 def apply(self):
360 self.document.walk(Text2WordsVisitor(self.document))
362 class Text2WordsVisitor(nodes.SparseNodeVisitor):
364 def visit_Text(self, text):
365 words = Word.splitText(text)
366 if not words:
367 # An empty text
368 words = [ White(''), ]
369 text.parent.replace(text, words)
371 class Words2Text(Transform):
372 """Transforms a sequence of `Word`/`White` into a `Text` node."""
374 def apply(self):
375 self.document.walk(Words2TextVisitor(self.document))
377 class Words2TextVisitor(nodes.SparseNodeVisitor):
379 def visit_Text(self, text):
380 parent = text.parent
381 # Find this node and the first node of the sequence it belongs to
382 first = None
383 for i in range(len(parent)):
384 if not isinstance(parent[i], nodes.Text):
385 first = None
386 elif first is None:
387 first = i
388 # ``parent.index(text)`` uses value equality - can not be
389 # used here to find `text`
390 if id(parent[i]) == id(text):
391 end = i + 1
392 break
393 else:
394 raise IndexError("Can not find %r in its parent" % ( text, ))
396 if (len(parent) > end
397 and isinstance(parent[end], nodes.Text)):
398 # The visitor processes following children even if they are
399 # deleted - so work for last node of a sequence
400 return
402 texts = nodes.Text(reduce(lambda s, node: s + node.astext(),
403 parent[first:end], ""))
404 parent[first:end] = ( texts, )
406 visit_White = visit_Text
408 visit_Word = visit_Text
410 class Generated2Inline(Transform):
411 """Transforms a `generated` node into an `inline` node."""
413 def apply(self):
414 self.document.walk(Generated2InlineVisitor(self.document))
416 class Generated2InlineVisitor(nodes.SparseNodeVisitor):
418 def visit_generated(self, generated):
419 inline = nodes.inline(text=generated.children[0].astext(),
420 *generated.children[1:], **generated.attributes)
421 generated.parent.replace(generated, inline)
423 ###############################################################################
424 ###############################################################################
425 # Hashable
427 class DocutilsDispatcher(HashableNodeImpl):
428 """Implements hashable for a docutils `Node` and supports construction."""
430 reporter = None
432 def __init__(self, reporter):
433 super(self.__class__, self).__init__(nodes.Node)
434 self.reporter = reporter
436 def dispatchClass(self, function, node, *args):
437 """Dispatch a call of type `function` for the class of `node` using
438 arguments `node` and `args`. Default is to dispatch for imaginary class
439 "UNKNOWN"."""
440 pat = "%s_%%s" % ( function, )
441 try:
442 name = pat % ( node.__class__.__name__, )
443 method = getattr(self, name)
444 except AttributeError:
445 name = pat % ( 'UNKNOWN', )
446 method = getattr(self, name)
447 self.reporter.debug("*** %s(%s)"
448 % ( name, ", ".join([ arg.__class__.__name__
449 for arg
450 in ( node, ) + args ]), ))
451 for arg in ( node, ) + args:
452 try:
453 self.reporter.debug(" > %s" % ( arg, ))
454 except UnicodeEncodeError:
455 self.reporter.debug(" > CANNOT OUTPUT ARGUMENT OF TYPE %s"
456 % ( type(arg), ))
457 result = method(node, *args)
458 try:
459 self.reporter.debug(" < %s" % ( result, ))
460 except UnicodeEncodeError:
461 self.reporter.debug(" < CANNOT OUTPUT RESULT OF TYPE %s"
462 % ( type(result), ))
463 return result
465 ###########################################################################
466 ###########################################################################
467 # Implementation of abstract methods for `HashableNodeImpl`
469 def rootHash(self, node):
470 """Return a hash for the root only. Subclasses must override
471 this."""
472 return self.dispatchClass('rootHash', node)
474 def rootHash_UNKNOWN(self, node):
475 return hash(node.__class__)
477 def rootEq(self, node, other):
478 """Returns root equality of `node` and an `other` node. ``True`` if
479 the two nodes as roots are equal without considering their
480 children. This should be true if one node can be replaced by
481 the other and all changes can be represented without changing
482 the node itself. Subclasses must override this."""
483 # Only nodes of the same class can be equal - this assumption
484 # is used in many places
485 if node.__class__ != other.__class__:
486 return False
487 return self.dispatchClass('rootEq', node, other)
489 def rootEq_UNKNOWN(self, node, other):
490 # Unless we know better two roots of the same type are considered equal
491 return True
493 def childHash(self, node):
494 """Return a hash for the node as a child. Subclasses must override
495 this."""
496 return self.dispatchClass('childHash', node)
498 def childHash_UNKNOWN(self, node):
499 # By default compare as a child by comparing children
500 return self.childrenHash(node)
502 def childEq(self, node, other):
503 """Returns equality of `node` and an `other` node as children.
504 ``True`` if the child features of the two nodes are equal
505 without considering the root. Subclasses must override
506 this."""
507 # Only nodes of the same class can be equal - this assumption
508 # is used in many places
509 if node.__class__ != other.__class__:
510 return False
511 return self.dispatchClass('childEq', node, other)
513 def childEq_UNKNOWN(self, node, other):
514 # By default compare as a child by comparing children
515 return self.childrenEq(node, other)
517 def getChildren(self, node):
518 """Return the children of `node` as a list. Subclasses must override
519 this."""
520 return self.dispatchClass('getChildren', node)
522 def getChildren_UNKNOWN(self, node):
523 return node.children
525 ###########################################################################
526 ###########################################################################
527 # Merging
529 # TODO The resulting class names should be configurable
530 NewDelete = 'removed'
531 NewInsert = 'added'
532 NewReplaced = 'replaced'
533 NewReplacement = 'replacement'
535 def copyRoot(self, node):
536 """Copy `node` as root and return it."""
537 return self.dispatchClass('copyRoot', node)
539 def copyRoot_UNKNOWN(self, node):
540 return node.copy()
542 def addChild(self, root, child):
543 """Add `child` to `root`."""
544 return self.dispatchClass('addChild', root, child)
546 def addChild_UNKNOWN(self, root, child):
547 root.append(child)
549 def copyChild(self, node, newType):
550 """Copy `node` as child and return it. `newType` is ``None`` for an
551 unchanged child or the change type."""
552 return self.dispatchClass('copyChild', node, newType)
554 def copyChild_UNKNOWN(self, node, newType):
555 return self.setNewType(node.deepcopy(), newType)
557 def copyChildren(self, head, tail, root, newType):
558 """Return a range of new nodes copied from [ `head` ] + `tail` under
559 `root`. `tail` are all the same class as `head`. Nodes are
560 created approproate to type `newType`."""
561 return self.dispatchClass('copyChildren', head, tail, root, newType)
563 def copyChildren_UNKNOWN(self, head, tail, root, newType):
564 return [ self.copyChild(child, newType)
565 for child in [ head, ] + tail ]
567 def copyRange(self, root, children, newType):
568 """Return a range of new nodes copied from `children` under `root`.
569 Nodes are created appropriate to type `newType`."""
570 result = [ ]
571 begin = 0
572 while begin < len(children):
573 first = children[begin]
574 end = begin + 1
575 while end < len(children):
576 last = children[end]
577 if not(first.__class__ == last.__class__
578 or (isinstance(first, nodes.Text)
579 and isinstance(last, nodes.Text))):
580 break
581 end += 1
582 result.extend(self.copyChildren(first, children[begin + 1:end],
583 root, newType))
584 begin = end
585 return result
587 def mergeChildren(self, diffRoot, oldRoot, newRoot,
588 command, oldRange, newRange):
589 """Add children to `diffRoot` merging children `oldRange` / `newRange`
590 of `oldRoot` / `newRoot` by `command`."""
591 if command == Opcode.Equal:
592 for old in oldRange:
593 self.addChild(diffRoot, self.copyChild(old, None))
594 elif command == Opcode.Insert or command == Opcode.Delete:
595 if command == Opcode.Insert:
596 srcRoot = newRoot
597 srcRange = newRange
598 newType = self.NewInsert
599 else:
600 srcRoot = oldRoot
601 srcRange = oldRange
602 newType = self.NewDelete
603 for newChild in self.copyRange(srcRoot, srcRange, newType):
604 self.addChild(diffRoot, newChild)
605 elif command == Opcode.Replace:
606 # TODO Replacement doubles elements. This needs to be
607 # reflected properly in the @ids. If the @ids don't change
608 # there need to be unique @ids for replaced elements. This
609 # needs also to be reflected in referring @refid and
610 # @backrefs.
611 for newChild in self.copyRange(oldRoot, oldRange,
612 self.NewReplaced):
613 self.addChild(diffRoot, newChild)
614 for newChild in self.copyRange(newRoot, newRange,
615 self.NewReplacement):
616 self.addChild(diffRoot, newChild)
617 else:
618 raise TypeError("Unhandled command %r" % ( command, ))
620 ###########################################################################
621 ###########################################################################
622 # Helpers
624 def setNewType(self, node, newType):
625 """Set a class on `node` for `newType` if set. Returns `node`."""
626 if newType:
627 node['classes'].append("change-%s" % ( newType, ))
628 return node
630 ###########################################################################
631 ###########################################################################
632 # Real comparison and merging
634 # The idea is like this: Each node has attributes which need to be
635 # compared as root and it has attributes which need to be compared
636 # as child. This is different for every node type.
638 # Similarly each node type may need special methods for cloning
639 # and merging.
641 ###########################################################################
642 # Text / Word / White
644 def rootHash_Text(self, node):
645 return hash(node.astext())
647 rootHash_Word = rootHash_Text
649 def rootHash_White(self, node):
650 # Whitespace compares all equal
651 return hash('')
653 def rootEq_Text(self, node, other):
654 return node.astext() == other.astext()
656 rootEq_Word = rootEq_Text
658 def rootEq_White(self, node, other):
659 # TODO Must behave different for places where whitespace
660 # differences are relevant
661 return True
663 # Text behaves the same as root or child
665 childHash_Text = rootHash_Text
666 childHash_Word = rootHash_Word
667 childHash_White = rootHash_White
669 childEq_Text = rootEq_Text
670 childEq_Word = rootEq_Word
671 childEq_White = rootEq_White
673 def copyChildren_Text(self, head, tail, root, newType):
674 if not tail and isinstance(head, nodes.Text) and not head.astext():
675 # Do not create empty inlines
676 return [ ]
677 inline = nodes.inline()
678 self.setNewType(inline, newType)
679 inline.extend([ head, ] + tail)
680 return [ inline, ]
682 # Sequences of Text are treated together
683 copyChildren_Word = copyChildren_Text
684 copyChildren_White = copyChildren_Text
686 ###########################################################################
687 # section
689 def getSectionName(self, node):
690 """Return the best name for `node`."""
691 if node['dupnames']:
692 return node['dupnames'][0]
693 if node['names']:
694 return node['names'][0]
695 if node['ids']:
696 return node['ids'][0]
697 return '' # No idea...
699 def rootEq_section(self, node, other):
700 """Compare sections by their names or normally."""
701 if node.document.settings.compare_sections_by_names:
702 return self.getSectionName(node) == self.getSectionName(other)
703 return True
705 ###########################################################################
706 # For some elements their attributes need to be considered to
707 # detect changes.
709 def attributeEq(self, node, other, attribute):
710 if (attribute in node) != (attribute in other):
711 return False
712 if not attribute in node:
713 return True
714 return node[attribute] == other[attribute]
716 ###########################################################################
717 # reference
719 def rootEq_reference(self, node, other):
720 return self.attributeEq(node, other, 'refuri')
722 ###########################################################################
723 # target
725 def rootEq_target(self, node, other):
726 return self.attributeEq(node, other, 'refuri')
728 ###########################################################################
729 # bullet_list
731 # TODO This is typically a minor change and should be requested by
732 # a special option
734 def attributeEq_bullet_list(self, node, other):
735 return self.attributeEq(node, other, 'bullet')
737 def rootEq_bullet_list(self, node, other):
738 return self.attributeEq_bullet_list(node, other)
740 def childEq_bullet_list(self, node, other):
741 return (self.attributeEq_bullet_list(node, other)
742 and self.childrenEq(node, other))
744 ###########################################################################
745 # enumerated_list
747 # TODO This is typically a minor change and should be requested by
748 # a special option
750 def attributeEq_enumerated_list(self, node, other):
751 return (self.attributeEq(node, other, 'enumtype')
752 and self.attributeEq(node, other, 'prefix')
753 and self.attributeEq(node, other, 'suffix')
754 and self.attributeEq(node, other, 'start'))
756 def rootEq_enumerated_list(self, node, other):
757 return self.attributeEq_enumerated_list(node, other)
759 def childEq_enumerated_list(self, node, other):
760 return (self.attributeEq_enumerated_list(node, other)
761 and self.childrenEq(node, other))
763 ###########################################################################
764 # image
766 def rootEq_image(self, node, other):
767 if node.__class__ != other.__class__:
768 return False
769 return self.attributeEq(node, other, 'uri')
771 ###########################################################################
772 # Some elements may contain only #PCDATA. They need to propagate
773 # changes in their children up to the element itself.
775 def rootEqWithChildren(self, node, other):
776 if node.__class__ != other.__class__:
777 return False
778 return self.childrenEq(node, other)
780 ###########################################################################
781 # comment
783 rootEq_comment = rootEqWithChildren
785 ###########################################################################
786 # literal
788 rootEq_literal = rootEqWithChildren
790 ###########################################################################
791 # option_string
793 rootEq_option_string = rootEqWithChildren
795 ###########################################################################
796 # label
798 # TODO This is typically a minor change and should be requested by
799 # a special option
801 rootEq_label = rootEqWithChildren
803 ###########################################################################
804 # footnote_reference
806 # TODO This is typically a minor change and should be requested by
807 # a special option
809 rootEq_footnote_reference = rootEqWithChildren
811 ###########################################################################
812 # citation_reference
814 # TODO This is typically a minor change and should be requested by
815 # a special option
817 rootEq_citation_reference = rootEqWithChildren
819 ###########################################################################
820 # For some elements their attributes need to be considered to
821 # detect changes *and* they may contain only #PCDATA.
823 ###########################################################################
824 # option_argument
826 # TODO This is typically a minor change and should be requested by
827 # a special option
829 def attributeEq_option_argument(self, node, other):
830 return self.attributeEq(node, other, 'delimiter')
832 def rootEq_option_argument(self, node, other):
833 return (self.attributeEq_option_argument(node, other)
834 and self.rootEqWithChildren(node, other))
836 def childEq_option_argument(self, node, other):
837 return (self.attributeEq_option_argument(node, other)
838 and self.childrenEq(node, other))
840 ###########################################################################
841 # A change in certain elements must propagate the change up since
842 # they may occur only once. Must be done by parents.
844 # Checks whether `node` and `other` have both a node of type
845 # `childClass` and whether the first of thosee are equal.
846 def rootEqWithChild(self, node, other, childClass):
847 if node.__class__ != other.__class__:
848 return False
850 nodeFound = None
851 for nodeChild in self.getChildren(node):
852 if isinstance(nodeChild, childClass):
853 nodeFound = nodeChild
854 break
856 otherFound = None
857 for otherChild in self.getChildren(other):
858 if isinstance(otherChild, childClass):
859 otherFound = otherChild
860 break
862 if nodeFound is None or otherFound is None:
863 return True
865 return self.childEq(nodeFound, otherFound)
867 ###########################################################################
868 # footnote
870 def rootEq_footnote(self, node, other):
871 return self.rootEqWithChild(node, other, nodes.label)
873 ###########################################################################
874 # citation
876 def rootEq_citation(self, node, other):
877 return self.rootEqWithChild(node, other, nodes.label)
879 ###########################################################################
880 # option
882 def rootEq_option(self, node, other):
883 return self.rootEqWithChild(node, other, nodes.option_string)
885 ###########################################################################
886 # Some attributes of some elements depend on their concrete parents.
888 # tgroup
889 def copyRoot_tgroup(self, node):
890 copy = node.copy()
891 copy['origcols'] = copy['cols']
892 copy['cols'] = 0
893 return copy
895 def addChild_tgroup(self, root, child):
896 root.append(child)
897 # This works only if for each column there is a `colspec`. Is
898 # this the case?
899 if isinstance(child, nodes.colspec):
900 root['cols'] += 1
901 elif isinstance(child, nodes.tbody):
902 # All columns seen - check the column widths
903 if root['origcols'] != root['cols']:
904 for elem in root:
905 if isinstance(elem, nodes.colspec):
906 elem['colwidth'] = 100 / root['cols']
907 del root['origcols']
909 # TODO Number of entries must change according to the (changed)
910 # number of columns; for added or removed columns entries of *one*
911 # column must be added / removed
913 ###############################################################################
914 ###############################################################################
915 # Main
917 def processCommandLine():
918 """Process command line and return a `Publisher`."""
919 # Determine writer here so options can be given normally
920 preWriter = writerDefault
921 for arg in sys.argv:
922 match = re.search(writerArgRE1, arg)
923 if match:
924 preWriter = match.group(1)
926 pub = Publisher3Args()
927 pub.set_reader('standalone', None, 'restructuredtext')
928 pub.set_writer(preWriter)
930 settingsSpec = SettingsSpec()
931 settingsSpec.settings_spec = settings_spec
932 settingsSpec.settings_defaults = settings_defaults
933 pub.process_command_line(usage=usage, description=description,
934 settings_spec=settingsSpec,
935 config_section=config_section)
936 if pub.settings.writer != preWriter:
937 new_reporter('<cmdline>',
938 pub.settings).severe("Internal error: Mismatch of pre-parsed (%r) and real (%r) writer"
939 % ( preWriter, pub.settings.writer, ))
940 pub.set_destination()
941 return pub
943 def readTree(pub, sourceName):
944 """Read and return a tree from `sourceName`."""
945 # Reset reader - just in case it keeps state from a previous invocation
946 pub.set_reader('standalone', None, 'restructuredtext')
947 pub.set_source(None, sourceName)
948 pub.document = None
949 pub.document = pub.reader.read(pub.source, pub.parser, pub.settings)
950 pub.apply_transforms()
951 return pub.document
953 def doDiff(hashableNodeImpl, oldTree, newTree):
954 """Create a difference from `oldTree` to `newTree` using
955 `hashableNodeImpl`. Returns the opcodes necessary to transform
956 `oldTree` to `newTree`."""
957 matcher = TreeMatcher(hashableNodeImpl, oldTree, newTree,
958 lambda node: isinstance(node, White))
959 return matcher.get_opcodes()
961 def buildDocument(oldTree, newTree, settings):
962 """Returns a new document for the result of converting `oldTree` to
963 `newTree`."""
964 if (not isinstance(oldTree, docutils.nodes.document)
965 or not isinstance(newTree, docutils.nodes.document)):
966 raise TypeError("Roots of trees must be documents")
967 return new_document(u"%s => %s"
968 % ( settings._old_source, settings._new_source, ),
969 settings)
971 def buildTree(dispatcher, diffRoot, opcodes, oldRoot, newRoot):
972 """Adds a new sub-tree under `diffRoot` converting children of
973 `oldRoot` to `newRoot` using `opcodes`."""
974 oldChildren = dispatcher.getChildren(oldRoot)
975 newChildren = dispatcher.getChildren(newRoot)
976 for opcode in opcodes:
977 ( command, oldRange, newRange,
978 subOpcodes, ) = Opcode(opcode).resolveOpcode(oldChildren, newChildren)
979 if command == Opcode.Descend:
980 child = dispatcher.copyRoot(oldRange[0])
981 dispatcher.addChild(diffRoot, child)
982 buildTree(dispatcher, child,
983 subOpcodes, oldRange[0], newRange[0])
984 else:
985 dispatcher.mergeChildren(diffRoot, oldRoot, newRoot,
986 command, oldRange, newRange)
988 # A replacement in certain elements must not be propagated up since
989 # they may occur only once and replacement would double them
990 replaceNotUp = ( nodes.title, nodes.subtitle, nodes.term, nodes.field_name,
991 nodes.attribution, nodes.caption, # (%text.model)
992 nodes.header, nodes.footer, nodes.definition,
993 nodes.field_body, nodes.description, nodes.legend,
994 nodes.entry, # (%body.elements;+) or (%body.elements;*)
995 nodes.decoration, nodes.docinfo, nodes.transition,
996 nodes.option_group, nodes.thead,
997 nodes.tbody, # different content model
1000 # A replacement in certain elements normally not subject to up
1001 # propagation and contained in certain elements may propagate up if
1002 # all their siblings are also replacements and would propagate up
1003 replaceUpSiblings = (
1004 ( nodes.title, nodes.section, ),
1005 ( nodes.subtitle, nodes.section, ),
1006 ( nodes.term, nodes.definition_list_item, ),
1007 ( nodes.field_name, nodes.field, ),
1008 ( nodes.attribution, nodes.block_quote, ),
1009 ( nodes.caption, nodes.figure, ),
1010 ( nodes.definition, nodes.definition_list_item, ),
1011 ( nodes.field_body, nodes.field, ),
1012 ( nodes.description, nodes.option_list_item, ),
1013 ( nodes.legend, nodes.figure, ),
1014 ( nodes.option_group, nodes.option_list_item, ),
1017 # TODO If much text is replaced in a text element the whole element
1018 # should be replaced. This makes more sense to people than two large
1019 # replaced/replacement blocks where the only equality is in words like
1020 # "the". The exact meaning of "much" should be an option.
1021 def cleanOpcodes(opcodes, dispatcher, oldList, newList):
1022 """Replace some nasty results in `opcodes` by cleaner versions. Opcodes
1023 create `newList` from `oldList`."""
1024 mightReplaceUpSiblings = [ ]
1025 for i in range(len(opcodes)):
1026 opcode = Opcode(opcodes[i])
1027 ( command, oldRange, newRange, subOpcodes,
1028 ) = opcode.resolveOpcode(oldList, newList)
1029 if not subOpcodes:
1030 # Nothing to clean for flat or empty opcodes
1031 continue
1033 oldNode = oldRange[0]
1034 newNode = newRange[0]
1035 cleanOpcodes(subOpcodes, dispatcher, dispatcher.getChildren(oldNode),
1036 dispatcher.getChildren(newNode))
1037 j = 1
1038 while j < len(subOpcodes):
1039 prev = Opcode(subOpcodes[j - 1])
1040 this = Opcode(subOpcodes[j])
1041 if (this.getCommand() != Opcode.Descend
1042 and prev.getCommand() == this.getCommand()):
1043 # Merge adjacing opcodes of same type
1044 prevOld = prev.getOldRange()
1045 prevNew = prev.getNewRange()
1046 thisOld = this.getOldRange()
1047 thisNew = this.getNewRange()
1048 prev.setOldRange(( prevOld[0], thisOld[1], ))
1049 prev.setNewRange(( prevNew[0], thisNew[1], ))
1050 subOpcodes[j - 1:j + 1] = [ prev.asTuple(), ]
1051 else:
1052 j += 1
1053 opcode.setSubOpcodes(subOpcodes)
1054 if len(subOpcodes) == 1:
1055 subOpcode = Opcode(subOpcodes[0])
1056 if subOpcode.getCommand() == Opcode.Descend:
1057 propagateUp = False
1058 elif subOpcode.getCommand() == Opcode.Replace:
1059 if any([ isinstance(oldNode, cls)
1060 for cls in replaceNotUp ]):
1061 propagateUp = False
1062 if any([ isinstance(oldNode, cls)
1063 and isinstance(oldNode.parent, parentCls)
1064 for ( cls, parentCls, ) in replaceUpSiblings ]):
1065 # If for instance a section/title would
1066 # propagate a replacement up the propagation
1067 # needs to be done if all siblings would
1068 # also propagate a replacement up
1069 mightReplaceUpSiblings.append(i)
1070 else:
1071 propagateUp = True
1072 else:
1073 propagateUp = True
1074 if propagateUp:
1075 # Propagate 1-element sequences up
1076 opcode.setCommand(subOpcode.getCommand())
1077 opcodes[i] = opcode.asTuple()
1079 if mightReplaceUpSiblings:
1080 # There are entries which might propagate a replace up if all
1081 # siblings could do as well
1082 if all([ i in mightReplaceUpSiblings
1083 or Opcode(opcodes[i]).getCommand() == Opcode.Replace
1084 for i in range(len(opcodes)) ]):
1085 # All entries are replacements which may propagate up -
1086 # actually propagate elements which may propagate
1087 for i in mightReplaceUpSiblings:
1088 opcode = Opcode(opcodes[i])
1089 opcode.setCommand(Opcode.Replace)
1090 opcodes[i] = opcode.asTuple()
1092 def createDiff(pub, oldTree, newTree):
1093 """Create and return a diff document from `oldTree` to `newTree`."""
1094 realDebug = pub.settings.debug
1095 pub.settings.debug = pub.settings.dump_rstdiff
1096 reporter = new_reporter("RSTDIFF", pub.settings)
1097 pub.settings.debug = realDebug
1098 dispatcher = DocutilsDispatcher(reporter)
1099 opcodes = doDiff(dispatcher, oldTree, newTree)
1101 if pub.settings.dump_rstdiff:
1102 reporter.debug(oldTree.asdom().toprettyxml())
1103 reporter.debug(newTree.asdom().toprettyxml())
1104 reporter.debug(pformat(opcodes, 2, 40, None))
1105 reporter.debug("^^^ Before cleaning vvv After cleaning")
1107 cleanOpcodes(opcodes, dispatcher, [ oldTree ], [ newTree ])
1109 if pub.settings.dump_rstdiff:
1110 reporter.debug(pformat(opcodes, 2, 40, None))
1112 if len(opcodes) != 1:
1113 raise TypeError("Don't know how to merge documents which are not rootEq")
1114 opcode = Opcode(opcodes[0])
1115 if opcode.getCommand() not in ( Opcode.Descend, Opcode.Equal, ):
1116 # TODO There should be a sense making message for this case
1117 # because this may happen due to up propagation of replacements
1118 raise TypeError("Don't know how to merge top level opcode of type %r"
1119 % ( opcode.getCommand(), ))
1121 diffDoc = buildDocument(oldTree, newTree, pub.settings)
1122 if opcode.getCommand() == Opcode.Equal:
1123 # TODO Equality should be reported somehow
1124 diffDoc.extend([ child.deepcopy()
1125 for child in newTree.children ])
1126 else:
1127 buildTree(dispatcher, diffDoc, opcode.getSubOpcodes(), oldTree, newTree)
1128 return diffDoc
1130 if __name__ == '__main__':
1131 pub = processCommandLine()
1133 useOptions(pub.settings, oldOption)
1134 oldTree = readTree(pub, pub.settings._old_source)
1135 useOptions(pub.settings, newOption)
1136 newTree = readTree(pub, pub.settings._new_source)
1137 useOptions(pub.settings, bothOption)
1139 Text2Words(oldTree).apply()
1140 Text2Words(newTree).apply()
1142 diffDoc = createDiff(pub, oldTree, newTree)
1143 Words2Text(diffDoc).apply()
1144 Generated2Inline(diffDoc).apply()
1146 pub.writer.write(diffDoc, pub.destination)
1147 pub.writer.assemble_parts()
1149 # TODO The CSS classes need to be set in a CSS stylesheet