Renamed option `--compare-sections-by-id` to
[docutils/kirr.git] / sandbox / rstdiff / rstdiff.py
blob2981f553601063199094678fd31bd6fbd8b28994
1 #!/usr/bin/env python
3 # Copyright (C) 2010 Stefan Merten
5 # rstdiff.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18 # 02111-1307, USA.
20 """
21 Generates a structural diff from two reStructuredText input documents
22 and produces an annotated result.
23 """
25 __docformat__ = 'reStructuredText'
27 try:
28 import locale
29 locale.setlocale(locale.LC_ALL, '')
30 except:
31 pass
33 import os, re, sys
35 from pprint import pformat
37 import docutils
38 from docutils import frontend, writers, nodes, SettingsSpec
39 from docutils.core import Publisher
40 from docutils.utils import SystemMessage, Reporter, new_reporter, new_document
41 from docutils.frontend import OptionParser, make_paths_absolute, validate_boolean
42 from docutils.transforms import Transform
44 from treediff import TreeMatcher, HashableNodeImpl
46 ###############################################################################
47 ###############################################################################
48 # Command line specification
50 description = ("""Generates a structural diff from two reStructuredText input
51 documents and produces an annotated result. """)
53 writerOption = 'writer'
54 writerDefault = 'xml'
55 writerArgRE1 = '^--' + writerOption + '=' + '(.*)$'
57 oldOption = 'old'
58 bothOption = 'both'
59 newOption = 'new'
61 def switchOptionsCallback(option, opt, value, parser, to):
62 """Callback for `optparse`."""
63 switchOptions(parser.values, to)
65 settings_spec = (
66 'rstdiff options',
67 None,
68 (('Select writer to write output with (default "xml").',
69 ['--' + writerOption],
70 {}),
71 ('Following options apply to the old input document'
72 + ' (default: both input documents).',
73 ['--' + oldOption],
74 { 'action': 'callback',
75 'callback': switchOptionsCallback,
76 'callback_args': ( oldOption, ),
77 }),
78 ('Following options apply to the new input document'
79 + ' (default: both input documents).',
80 ['--' + newOption],
81 { 'action': 'callback',
82 'callback': switchOptionsCallback,
83 'callback_args': ( newOption, ),
84 }),
85 ('Following options apply to both input documents'
86 + ' (default).',
87 ['--' + bothOption],
88 { 'action': 'callback',
89 'callback': switchOptionsCallback,
90 'callback_args': ( bothOption, ),
91 }),
92 ('Compare sections by comparing their names (default); '
93 + 'useful when section titles are stable but sections change',
94 ['--compare-sections-by-names'],
95 { 'action': 'store_true',
96 'default': 1, 'validator': validate_boolean}),
97 ('Compare sections normally; useful when section titles change',
98 ['--compare-sections-normally'],
99 { 'action': 'store_false', 'dest': 'compare_sections_by_names'}),
103 settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace',
104 writerOption: writerDefault}
106 config_section = 'rstdiff'
108 usage = '%prog [options]... <old> [<new> [<output>]]'
110 ###############################################################################
111 # Classes for three argument command lines
113 switchableMultiOptions = ( 'strip_elements_with_classes', 'strip_classes', )
114 switchableOptions = (
115 'title', 'generator', 'datestamp',
116 'source_link', 'source_url',
117 'toc_backlinks', 'footnote_backlinks',
118 'sectnum_xform', 'doctitle_xform', 'docinfo_xform', 'sectsubtitle_xform',
119 'strip_comments',
120 'input_encoding', 'input_encoding_error_handler',
121 'language_code',
122 'pep_references', 'pep_base_url', 'pep_file_url_template',
123 'rfc_references', 'rfc_base_url',
124 'trim_footnote_reference_space',
125 'file_insertion_enabled', 'raw_enabled',
126 'auto_id_prefix', 'id_prefix',
127 ) + switchableMultiOptions
129 def switchOptions(values, to):
130 """Switch `values` so following options apply to input document `to`."""
131 lastTo = getattr(values, '_optionsTo', '_' + bothOption)
132 lastTarget = getattr(values, lastTo, None)
133 if not lastTarget:
134 lastTarget = {}
135 setattr(values, lastTo, lastTarget)
136 target = getattr(values, '_' + to, None)
137 if not target:
138 target = {}
139 setattr(values, to, target)
140 for opt in switchableOptions:
141 if hasattr(values, opt):
142 # Save last option
143 lastTarget[opt] = getattr(values, opt)
144 delattr(values, opt)
145 if opt in target:
146 # Restore old option
147 setattr(values, opt, target[opt])
148 values._optionsTo = '_' + to
150 def useOptions(values, to):
151 """Set `values` so use options applying to input document `to`."""
152 for opt in switchableOptions:
153 if hasattr(values, opt):
154 delattr(values, opt)
155 for src in ( '_' + to, '_' + bothOption, ):
156 if hasattr(values, src) and opt in getattr(values, src):
157 if opt in switchableMultiOptions:
158 if not hasattr(values, opt):
159 setattr(values, opt, [])
160 if getattr(values, src)[opt] is not None:
161 getattr(values, opt).extend(getattr(values, src)[opt])
162 else:
163 setattr(values, opt, getattr(values, src)[opt])
164 break
166 class Publisher3Args(Publisher):
168 def setup_option_parser(self, usage=None, description=None,
169 settings_spec=None, config_section=None,
170 **defaults):
171 if config_section:
172 if not settings_spec:
173 settings_spec = SettingsSpec()
174 settings_spec.config_section = config_section
175 parts = config_section.split()
176 if len(parts) > 1 and parts[-1] == 'application':
177 settings_spec.config_section_dependencies = ['applications']
178 #@@@ Add self.source & self.destination to components in future?
179 option_parser = OptionParser3Args(
180 components=(self.parser, self.reader, self.writer, settings_spec),
181 defaults=defaults, read_config_files=1,
182 usage=usage, description=description)
183 return option_parser
185 class OptionParser3Args(OptionParser):
187 def check_values(self, values, args):
188 """Store positional arguments as runtime settings."""
189 # Complete a possible switch
190 switchOptions(values, bothOption)
191 values._old_source, values._new_source, values._destination = self.check_args(args)
192 make_paths_absolute(values.__dict__, self.relative_path_settings,
193 os.getcwd())
194 values._config_files = self.config_files
195 return values
197 def check_args(self, args):
198 old_source = new_source = destination = None
199 if not args:
200 self.error('At least 1 argument required.')
201 else:
202 old_source = args.pop(0)
203 if old_source == '-': # means stdin
204 old_source = None
205 if args:
206 new_source = args.pop(0)
207 if new_source == '-': # means stdin
208 new_source = None
209 if args:
210 destination = args.pop(0)
211 if destination == '-': # means stdout
212 destination = None
213 if args:
214 self.error('Maximum 3 arguments allowed.')
215 if old_source is None and new_source is None:
216 self.error('Old and new source may not both use stdin.')
217 if (old_source and old_source == destination
218 or new_source and new_source == destination):
219 self.error('Do not specify the same file for both source and '
220 'destination. It will clobber the source file.')
221 return old_source, new_source, destination
223 ###############################################################################
224 ###############################################################################
225 # Helpers
227 class Opcode(object):
228 """Encapsulates opcodes as returned by `TreeMatcher.get_opcodes()`"""
230 Replace = 'replace'
231 Delete = 'delete'
232 Insert = 'insert'
233 Equal = 'equal'
234 Descend = 'descend'
236 _tuple = None
238 def __init__(self, opcodeTuple):
239 """Initialize from a tuple returned by `TreeMatcher.get_opcodes()`"""
240 self._tuple = list(opcodeTuple)
242 def getCommand(self):
243 """Return the command."""
244 return self._tuple[0]
246 def getOldRange(self):
247 """Returns the range pertaining to an old list."""
248 return ( self._tuple[1], self._tuple[2], )
250 def getNewRange(self):
251 """Returns the range pertaining to a new list."""
252 return ( self._tuple[3], self._tuple[4], )
254 def getSubOpcodes(self):
255 """Return the sub-opcodes in case of `command` == 'descend' or
256 `None`."""
257 if self._tuple[0] != self.Descend:
258 return None
259 return self._tuple[5]
261 def resolveOpcode(self, oldList, newList):
262 """Resolves opcode pertaining to `oldList` and `newList`. Returns tuple
263 consisting of
265 command
266 Same as self.getCommand().
268 oldRange
269 The range of elements in `oldList` affected by the opcode.
271 newRange
272 The range of elements in `newList` affected by the opcode.
274 subOpcodes
275 Same as self.getSubOpcodes().
277 oldRange = self.getOldRange()
278 newRange = self.getNewRange()
279 return ( self.getCommand(), oldList[oldRange[0]:oldRange[1]],
280 newList[newRange[0]:newRange[1]], self.getSubOpcodes())
282 def setSubOpcodes(self, opcodes):
283 """Set the sub-opcodes to a new list."""
284 if self._tuple[0] != self.Descend:
285 raise TypeError("Can not set subopcodes of a %r opcode"
286 % ( self._tuple[0], ))
287 self._tuple[5] = opcodes
289 def setCommand(self, command):
290 """Set a new command adapting subopcodes."""
291 if self._tuple[0] == command:
292 return
293 self._tuple[0] = command
294 if command == self.Descend:
295 self._tuple[5] = [ ]
296 else:
297 self._tuple = self._tuple[0:5]
299 def setOldRange(self, range):
300 """Sets the range pertaining to an old list."""
301 ( self._tuple[1], self._tuple[2], ) = range
303 def setNewRange(self, range):
304 """Sets the range pertaining to a new list."""
305 ( self._tuple[3], self._tuple[4], ) = range
307 def asTuple(self):
308 """Return the opcode as a tuple."""
309 return tuple(self._tuple)
311 ###############################################################################
312 ###############################################################################
313 # Additional docutils stuff
315 ###############################################################################
316 # Node types
318 class White(nodes.Text):
319 """A piece of text containing only whitespace."""
321 tagname = '#white'
323 """A regular expression matching strings for this class and returning
324 them as the first match."""
325 # TODO Could be subject to an option
326 re = '(\\s+)'
328 class Word(nodes.Text):
329 """A piece of text containing exactly one word."""
331 tagname = '#word'
333 @staticmethod
334 def splitText(text):
335 """Splits text and returns a sequence of `Word` and `White`
336 objects. Returns an empty sequence for an empty `text`."""
338 subs = re.split(White.re, text.astext())
339 result = [ ]
340 if not subs:
341 return result
342 elif re.match(White.re, subs[0]):
343 ( current, next, ) = ( White, Word, )
344 else:
345 ( current, next, ) = ( Word, White, )
346 for sub in subs:
347 result.append(current(sub))
348 ( current, next, ) = ( next, current, )
349 return result
351 ###############################################################################
352 # Transformers
354 class Text2Words(Transform):
355 """Transforms a `Text` node into a sequence of `Word`/`White`."""
357 def apply(self):
358 self.document.walk(Text2WordsVisitor(self.document))
360 class Text2WordsVisitor(nodes.SparseNodeVisitor):
362 def visit_Text(self, text):
363 words = Word.splitText(text)
364 if not words:
365 # An empty text
366 words = [ White(''), ]
367 text.parent.replace(text, words)
369 class Words2Text(Transform):
370 """Transforms a sequence of `Word`/`White` into a `Text` node."""
372 def apply(self):
373 self.document.walk(Words2TextVisitor(self.document))
375 class Words2TextVisitor(nodes.SparseNodeVisitor):
377 def visit_Text(self, text):
378 parent = text.parent
379 # Find this node and the first node of the sequence it belongs to
380 first = None
381 for i in range(len(parent)):
382 if not isinstance(parent[i], nodes.Text):
383 first = None
384 elif first is None:
385 first = i
386 # ``parent.index(text)`` uses value equality - can not be
387 # used here to find `text`
388 if id(parent[i]) == id(text):
389 end = i + 1
390 break
391 else:
392 raise IndexError("Can not find %r in its parent" % ( text, ))
394 if (len(parent) > end
395 and isinstance(parent[end], nodes.Text)):
396 # The visitor processes following children even if they are
397 # deleted - so work for last node of a sequence
398 return
400 texts = nodes.Text(reduce(lambda s, node: s + node.astext(),
401 parent[first:end], ""))
402 parent[first:end] = ( texts, )
404 visit_White = visit_Text
406 visit_Word = visit_Text
408 ###############################################################################
409 ###############################################################################
410 # Hashable
412 class DocutilsDispatcher(HashableNodeImpl):
413 """Implements hashable for a docutils `Node` and supports construction."""
415 reporter = None
417 def __init__(self, reporter):
418 super(self.__class__, self).__init__(nodes.Node)
419 self.reporter = reporter
421 def dispatchClass(self, function, node, *args):
422 """Dispatch a call of type `function` for the class of `node` using
423 arguments `node` and `args`. Default is to dispatch for imaginary class
424 "UNKNOWN"."""
425 pat = "%s_%%s" % ( function, )
426 try:
427 name = pat % ( node.__class__.__name__, )
428 method = getattr(self, name)
429 except AttributeError:
430 name = pat % ( 'UNKNOWN', )
431 method = getattr(self, name)
432 self.reporter.debug("*** %s(%s)"
433 % ( name, ", ".join([ arg.__class__.__name__
434 for arg
435 in ( node, ) + args ]), ))
436 for arg in ( node, ) + args:
437 try:
438 self.reporter.debug(" > %s" % ( arg, ))
439 except UnicodeEncodeError:
440 self.reporter.debug(" > CANNOT OUTPUT ARGUMENT OF TYPE %s"
441 % ( type(arg), ))
442 result = method(node, *args)
443 try:
444 self.reporter.debug(" < %s" % ( result, ))
445 except UnicodeEncodeError:
446 self.reporter.debug(" < CANNOT OUTPUT RESULT OF TYPE %s"
447 % ( type(result), ))
448 return result
450 ###########################################################################
451 ###########################################################################
452 # Implementation of abstract methods for `HashableNodeImpl`
454 def rootHash(self, node):
455 """Return a hash for the root only. Subclasses must override
456 this."""
457 return self.dispatchClass('rootHash', node)
459 def rootHash_UNKNOWN(self, node):
460 return hash(node.__class__)
462 def rootEq(self, node, other):
463 """Returns root equality of `node` and an `other` node. ``True`` if
464 the two nodes as roots are equal without considering their
465 children. This should be true if one node can be replaced by
466 the other and all changes can be represented without changing
467 the node itself. Subclasses must override this."""
468 # Only nodes of the same class can be equal - this assumption
469 # is used in many places
470 if node.__class__ != other.__class__:
471 return False
472 return self.dispatchClass('rootEq', node, other)
474 def rootEq_UNKNOWN(self, node, other):
475 # Unless we know better two roots of the same type are considered equal
476 return True
478 def childHash(self, node):
479 """Return a hash for the node as a child. Subclasses must override
480 this."""
481 return self.dispatchClass('childHash', node)
483 def childHash_UNKNOWN(self, node):
484 # By default compare as a child by comparing children
485 return self.childrenHash(node)
487 def childEq(self, node, other):
488 """Returns equality of `node` and an `other` node as children.
489 ``True`` if the child features of the two nodes are equal
490 without considering the root. Subclasses must override
491 this."""
492 # Only nodes of the same class can be equal - this assumption
493 # is used in many places
494 if node.__class__ != other.__class__:
495 return False
496 return self.dispatchClass('childEq', node, other)
498 def childEq_UNKNOWN(self, node, other):
499 # By default compare as a child by comparing children
500 return self.childrenEq(node, other)
502 def getChildren(self, node):
503 """Return the children of `node` as a list. Subclasses must override
504 this."""
505 return self.dispatchClass('getChildren', node)
507 def getChildren_UNKNOWN(self, node):
508 return node.children
510 ###########################################################################
511 ###########################################################################
512 # Merging
514 # TODO The resulting class names should be configurable
515 NewDelete = 'removed'
516 NewInsert = 'added'
517 NewReplaced = 'replaced'
518 NewReplacement = 'replacement'
520 def copyRoot(self, node):
521 """Copy `node` as root and return it."""
522 return self.dispatchClass('copyRoot', node)
524 def copyRoot_UNKNOWN(self, node):
525 return node.copy()
527 def addChild(self, root, child):
528 """Add `child` to `root`."""
529 return self.dispatchClass('addChild', root, child)
531 def addChild_UNKNOWN(self, root, child):
532 root.append(child)
534 def copyChild(self, node, newType):
535 """Copy `node` as child and return it. `newType` is ``None`` for an
536 unchanged child or the change type."""
537 return self.dispatchClass('copyChild', node, newType)
539 def copyChild_UNKNOWN(self, node, newType):
540 return self.setNewType(node.deepcopy(), newType)
542 def copyChildren(self, head, tail, root, newType):
543 """Return a range of new nodes copied from [ `head` ] + `tail` under
544 `root`. `tail` are all the same class as `head`. Nodes are
545 created approproate to type `newType`."""
546 return self.dispatchClass('copyChildren', head, tail, root, newType)
548 def copyChildren_UNKNOWN(self, head, tail, root, newType):
549 return [ self.copyChild(child, newType)
550 for child in [ head, ] + tail ]
552 def copyRange(self, root, children, newType):
553 """Return a range of new nodes copied from `children` under `root`.
554 Nodes are created appropriate to type `newType`."""
555 result = [ ]
556 begin = 0
557 while begin < len(children):
558 first = children[begin]
559 end = begin + 1
560 while end < len(children):
561 last = children[end]
562 if not(first.__class__ == last.__class__
563 or (isinstance(first, nodes.Text)
564 and isinstance(last, nodes.Text))):
565 break
566 end += 1
567 result.extend(self.copyChildren(first, children[begin + 1:end],
568 root, newType))
569 begin = end
570 return result
572 def mergeChildren(self, diffRoot, oldRoot, newRoot,
573 command, oldRange, newRange):
574 """Add children to `diffRoot` merging children `oldRange` / `newRange`
575 of `oldRoot` / `newRoot` by `command`."""
576 if command == Opcode.Equal:
577 for old in oldRange:
578 self.addChild(diffRoot, self.copyChild(old, None))
579 elif command == Opcode.Insert or command == Opcode.Delete:
580 if command == Opcode.Insert:
581 srcRoot = newRoot
582 srcRange = newRange
583 newType = self.NewInsert
584 else:
585 srcRoot = oldRoot
586 srcRange = oldRange
587 newType = self.NewDelete
588 for newChild in self.copyRange(srcRoot, srcRange, newType):
589 self.addChild(diffRoot, newChild)
590 elif command == Opcode.Replace:
591 # TODO Replacement doubles elements. This needs to be
592 # reflected properly in the @ids. If the @ids don't change
593 # there need to be unique @ids for replaced elements. This
594 # needs also to be reflected in referring @refid and
595 # @backrefs.
596 for newChild in self.copyRange(oldRoot, oldRange,
597 self.NewReplaced):
598 self.addChild(diffRoot, newChild)
599 for newChild in self.copyRange(newRoot, newRange,
600 self.NewReplacement):
601 self.addChild(diffRoot, newChild)
602 else:
603 raise TypeError("Unhandled command %r" % ( command, ))
605 ###########################################################################
606 ###########################################################################
607 # Helpers
609 def setNewType(self, node, newType):
610 """Set a class on `node` for `newType` if set. Returns `node`."""
611 if newType:
612 node['classes'].append("change-%s" % ( newType, ))
613 return node
615 ###########################################################################
616 ###########################################################################
617 # Real comparison and merging
619 # The idea is like this: Each node has attributes which need to be
620 # compared as root and it has attributes which need to be compared
621 # as child. This is different for every node type.
623 # Similarly each node type may need special methods for cloning
624 # and merging.
626 ###########################################################################
627 # Text / Word / White
629 def rootHash_Text(self, node):
630 return hash(node.astext())
632 rootHash_Word = rootHash_Text
634 def rootHash_White(self, node):
635 # Whitespace compares all equal
636 return hash('')
638 def rootEq_Text(self, node, other):
639 return node.astext() == other.astext()
641 rootEq_Word = rootEq_Text
643 def rootEq_White(self, node, other):
644 # TODO Must behave different for places where whitespace
645 # differences are relevant
646 return True
648 # Text behaves the same as root or child
650 childHash_Text = rootHash_Text
651 childHash_Word = rootHash_Word
652 childHash_White = rootHash_White
654 childEq_Text = rootEq_Text
655 childEq_Word = rootEq_Word
656 childEq_White = rootEq_White
658 def copyChildren_Text(self, head, tail, root, newType):
659 if not tail and isinstance(head, nodes.Text) and not head.astext():
660 # Do not create empty inlines
661 return [ ]
662 inline = nodes.inline()
663 self.setNewType(inline, newType)
664 inline.extend([ head, ] + tail)
665 return [ inline, ]
667 # Sequences of Text are treated together
668 copyChildren_Word = copyChildren_Text
669 copyChildren_White = copyChildren_Text
671 ###########################################################################
672 # section
674 def getSectionName(self, node):
675 if node['dupnames']:
676 return node['dupnames'][0]
677 if node['names'][0]:
678 return node['names'][0]
679 return node['ids'][0]
681 def rootEq_section(self, node, other):
682 """Compare sections by their names or normally."""
683 if node.document.settings.compare_sections_by_names:
684 return self.getSectionName(node) == self.getSectionName(other)
685 return True
687 ###########################################################################
688 # For some elements their attributes need to be considered to
689 # detect changes.
691 def attributeEq(self, node, other, attribute):
692 if (attribute in node) != (attribute in other):
693 return False
694 if not attribute in node:
695 return True
696 return node[attribute] == other[attribute]
698 ###########################################################################
699 # reference
701 def rootEq_reference(self, node, other):
702 return self.attributeEq(node, other, 'refuri')
704 ###########################################################################
705 # target
707 def rootEq_target(self, node, other):
708 return self.attributeEq(node, other, 'refuri')
710 ###########################################################################
711 # bullet_list
713 # TODO This is typically a minor change and should be requested by
714 # a special option
716 def attributeEq_bullet_list(self, node, other):
717 return self.attributeEq(node, other, 'bullet')
719 def rootEq_bullet_list(self, node, other):
720 return self.attributeEq_bullet_list(node, other)
722 def childEq_bullet_list(self, node, other):
723 return (self.attributeEq_bullet_list(node, other)
724 and self.childrenEq(node, other))
726 ###########################################################################
727 # enumerated_list
729 # TODO This is typically a minor change and should be requested by
730 # a special option
732 def attributeEq_enumerated_list(self, node, other):
733 return (self.attributeEq(node, other, 'enumtype')
734 and self.attributeEq(node, other, 'prefix')
735 and self.attributeEq(node, other, 'suffix')
736 and self.attributeEq(node, other, 'start'))
738 def rootEq_enumerated_list(self, node, other):
739 return self.attributeEq_enumerated_list(node, other)
741 def childEq_enumerated_list(self, node, other):
742 return (self.attributeEq_enumerated_list(node, other)
743 and self.childrenEq(node, other))
745 ###########################################################################
746 # image
748 def rootEq_image(self, node, other):
749 if node.__class__ != other.__class__:
750 return False
751 return self.attributeEq(node, other, 'uri')
753 ###########################################################################
754 # Some elements may contain only #PCDATA. They need to propagate
755 # changes in their children up to the element itself.
757 def rootEqWithChildren(self, node, other):
758 if node.__class__ != other.__class__:
759 return False
760 return self.childrenEq(node, other)
762 ###########################################################################
763 # comment
765 rootEq_comment = rootEqWithChildren
767 ###########################################################################
768 # literal
770 rootEq_literal = rootEqWithChildren
772 ###########################################################################
773 # option_string
775 rootEq_option_string = rootEqWithChildren
777 ###########################################################################
778 # label
780 # TODO This is typically a minor change and should be requested by
781 # a special option
783 rootEq_label = rootEqWithChildren
785 ###########################################################################
786 # footnote_reference
788 # TODO This is typically a minor change and should be requested by
789 # a special option
791 rootEq_footnote_reference = rootEqWithChildren
793 ###########################################################################
794 # citation_reference
796 # TODO This is typically a minor change and should be requested by
797 # a special option
799 rootEq_citation_reference = rootEqWithChildren
801 ###########################################################################
802 # For some elements their attributes need to be considered to
803 # detect changes *and* they may contain only #PCDATA.
805 ###########################################################################
806 # option_argument
808 # TODO This is typically a minor change and should be requested by
809 # a special option
811 def attributeEq_option_argument(self, node, other):
812 return self.attributeEq(node, other, 'delimiter')
814 def rootEq_option_argument(self, node, other):
815 return (self.attributeEq_option_argument(node, other)
816 and self.rootEqWithChildren(node, other))
818 def childEq_option_argument(self, node, other):
819 return (self.attributeEq_option_argument(node, other)
820 and self.childrenEq(node, other))
822 ###########################################################################
823 # A change in certain elements must propagate the change up since
824 # they may occur only once. Must be done by parents.
826 # Checks whether `node` and `other` have both a node of type
827 # `childClass` and whether the first of thosee are equal.
828 def rootEqWithChild(self, node, other, childClass):
829 if node.__class__ != other.__class__:
830 return False
832 nodeFound = None
833 for nodeChild in self.getChildren(node):
834 if isinstance(nodeChild, childClass):
835 nodeFound = nodeChild
836 break
838 otherFound = None
839 for otherChild in self.getChildren(other):
840 if isinstance(otherChild, childClass):
841 otherFound = otherChild
842 break
844 if nodeFound is None or otherFound is None:
845 return True
847 return self.childEq(nodeFound, otherFound)
849 ###########################################################################
850 # footnote
852 def rootEq_footnote(self, node, other):
853 return self.rootEqWithChild(node, other, nodes.label)
855 ###########################################################################
856 # citation
858 def rootEq_citation(self, node, other):
859 return self.rootEqWithChild(node, other, nodes.label)
861 ###########################################################################
862 # option
864 def rootEq_option(self, node, other):
865 return self.rootEqWithChild(node, other, nodes.option_string)
867 ###########################################################################
868 # Some attributes of some elements depend on their concrete parents.
870 # tgroup
871 def copyRoot_tgroup(self, node):
872 copy = node.copy()
873 copy['origcols'] = copy['cols']
874 copy['cols'] = 0
875 return copy
877 def addChild_tgroup(self, root, child):
878 root.append(child)
879 # This works only if for each column there is a `colspec`. Is
880 # this the case?
881 if isinstance(child, nodes.colspec):
882 root['cols'] += 1
883 elif isinstance(child, nodes.tbody):
884 # All columns seen - check the column widths
885 if root['origcols'] != root['cols']:
886 for elem in root:
887 if isinstance(elem, nodes.colspec):
888 elem['colwidth'] = 100 / root['cols']
889 del root['origcols']
891 # TODO Number of entries must change according to the (changed)
892 # number of columns; for added or removed columns entries of *one*
893 # column must be added / removed
895 ###############################################################################
896 ###############################################################################
897 # Main
899 def processCommandLine():
900 """Process command line and return a `Publisher`."""
901 # Determine writer here so options can be given normally
902 preWriter = writerDefault
903 for arg in sys.argv:
904 match = re.search(writerArgRE1, arg)
905 if match:
906 preWriter = match.group(1)
908 pub = Publisher3Args()
909 pub.set_reader('standalone', None, 'restructuredtext')
910 pub.set_writer(preWriter)
912 settingsSpec = SettingsSpec()
913 settingsSpec.settings_spec = settings_spec
914 settingsSpec.settings_defaults = settings_defaults
915 pub.process_command_line(usage=usage, description=description,
916 settings_spec=settingsSpec,
917 config_section=config_section)
918 if pub.settings.writer != preWriter:
919 new_reporter('<cmdline>',
920 pub.settings).severe("Internal error: Mismatch of pre-parsed (%r) and real (%r) writer"
921 % ( preWriter, pub.settings.writer, ))
922 pub.set_destination()
923 return pub
925 def readTree(pub, sourceName):
926 """Read and return a tree from `sourceName`."""
927 # Reset reader - just in case it keeps state from a previous invocation
928 pub.set_reader('standalone', None, 'restructuredtext')
929 pub.set_source(None, sourceName)
930 pub.document = None
931 pub.document = pub.reader.read(pub.source, pub.parser, pub.settings)
932 pub.apply_transforms()
933 return pub.document
935 def doDiff(hashableNodeImpl, oldTree, newTree):
936 """Create a difference from `oldTree` to `newTree` using
937 `hashableNodeImpl`. Returns the opcodes necessary to transform
938 `oldTree` to `newTree`."""
939 matcher = TreeMatcher(hashableNodeImpl, oldTree, newTree,
940 lambda node: isinstance(node, White))
941 return matcher.get_opcodes()
943 def buildDocument(oldTree, newTree, settings):
944 """Returns a new document for the result of converting `oldTree` to
945 `newTree`."""
946 if (not isinstance(oldTree, docutils.nodes.document)
947 or not isinstance(newTree, docutils.nodes.document)):
948 raise TypeError("Roots of trees must be documents")
949 return new_document(u"%s => %s"
950 % ( settings._old_source, settings._new_source, ),
951 settings)
953 def buildTree(dispatcher, diffRoot, opcodes, oldRoot, newRoot):
954 """Adds a new sub-tree under `diffRoot` converting children of
955 `oldRoot` to `newRoot` using `opcodes`."""
956 oldChildren = dispatcher.getChildren(oldRoot)
957 newChildren = dispatcher.getChildren(newRoot)
958 for opcode in opcodes:
959 ( command, oldRange, newRange,
960 subOpcodes, ) = Opcode(opcode).resolveOpcode(oldChildren, newChildren)
961 if command == Opcode.Descend:
962 child = dispatcher.copyRoot(oldRange[0])
963 dispatcher.addChild(diffRoot, child)
964 buildTree(dispatcher, child,
965 subOpcodes, oldRange[0], newRange[0])
966 else:
967 dispatcher.mergeChildren(diffRoot, oldRoot, newRoot,
968 command, oldRange, newRange)
970 # A replacement in certain elements must not be propagated up since
971 # they may occur only once and replacement would double them
972 replaceNotUp = ( nodes.title, nodes.subtitle, nodes.term, nodes.field_name,
973 nodes.attribution, nodes.caption, # (%text.model)
974 nodes.header, nodes.footer, nodes.definition,
975 nodes.field_body, nodes.description, nodes.legend,
976 nodes.entry, # (%body.elements;+) or (%body.elements;*)
977 nodes.decoration, nodes.docinfo, nodes.transition,
978 nodes.option_group, nodes.thead,
979 nodes.tbody, # different content model
982 # A replacement in certain elements normally not subject to up
983 # propagation and contained in certain elements may propagate up if
984 # all their siblings are also replacements and would propagate up
985 replaceUpSiblings = (
986 ( nodes.title, nodes.section, ),
987 ( nodes.subtitle, nodes.section, ),
988 ( nodes.term, nodes.definition_list_item, ),
989 ( nodes.field_name, nodes.field, ),
990 ( nodes.attribution, nodes.block_quote, ),
991 ( nodes.caption, nodes.figure, ),
992 ( nodes.definition, nodes.definition_list_item, ),
993 ( nodes.field_body, nodes.field, ),
994 ( nodes.description, nodes.option_list_item, ),
995 ( nodes.legend, nodes.figure, ),
996 ( nodes.option_group, nodes.option_list_item, ),
999 # TODO If much text is replaced in a text element the whole element
1000 # should be replaced. This makes more sense to people than two large
1001 # replaced/replacement blocks where the only equality is in words like
1002 # "the". The exact meaning of "much" should be an option.
1003 def cleanOpcodes(opcodes, dispatcher, oldList, newList):
1004 """Replace some nasty results in `opcodes` by cleaner versions. Opcodes
1005 create `newList` from `oldList`."""
1006 mightReplaceUpSiblings = [ ]
1007 for i in range(len(opcodes)):
1008 opcode = Opcode(opcodes[i])
1009 ( command, oldRange, newRange, subOpcodes,
1010 ) = opcode.resolveOpcode(oldList, newList)
1011 if not subOpcodes:
1012 # Nothing to clean for flat or empty opcodes
1013 continue
1015 oldNode = oldRange[0]
1016 newNode = newRange[0]
1017 cleanOpcodes(subOpcodes, dispatcher, dispatcher.getChildren(oldNode),
1018 dispatcher.getChildren(newNode))
1019 j = 1
1020 while j < len(subOpcodes):
1021 prev = Opcode(subOpcodes[j - 1])
1022 this = Opcode(subOpcodes[j])
1023 if (this.getCommand() != Opcode.Descend
1024 and prev.getCommand() == this.getCommand()):
1025 # Merge adjacing opcodes of same type
1026 prevOld = prev.getOldRange()
1027 prevNew = prev.getNewRange()
1028 thisOld = this.getOldRange()
1029 thisNew = this.getNewRange()
1030 prev.setOldRange(( prevOld[0], thisOld[1], ))
1031 prev.setNewRange(( prevNew[0], thisNew[1], ))
1032 subOpcodes[j - 1:j + 1] = [ prev.asTuple(), ]
1033 else:
1034 j += 1
1035 opcode.setSubOpcodes(subOpcodes)
1036 if len(subOpcodes) == 1:
1037 subOpcode = Opcode(subOpcodes[0])
1038 if subOpcode.getCommand() == Opcode.Descend:
1039 propagateUp = False
1040 elif subOpcode.getCommand() == Opcode.Replace:
1041 if any([ isinstance(oldNode, cls)
1042 for cls in replaceNotUp ]):
1043 propagateUp = False
1044 if any([ isinstance(oldNode, cls)
1045 and isinstance(oldNode.parent, parentCls)
1046 for ( cls, parentCls, ) in replaceUpSiblings ]):
1047 # If for instance a section/title would
1048 # propagate a replacement up the propagation
1049 # needs to be done if all siblings would
1050 # also propagate a replacement up
1051 mightReplaceUpSiblings.append(i)
1052 else:
1053 propagateUp = True
1054 else:
1055 propagateUp = True
1056 if propagateUp:
1057 # Propagate 1-element sequences up
1058 opcode.setCommand(subOpcode.getCommand())
1059 opcodes[i] = opcode.asTuple()
1061 if mightReplaceUpSiblings:
1062 # There are entries which might propagate a replace up if all
1063 # siblings could do as well
1064 if all([ i in mightReplaceUpSiblings
1065 or Opcode(opcodes[i]).getCommand() == Opcode.Replace
1066 for i in range(len(opcodes)) ]):
1067 # All entries are replacements which may propagate up -
1068 # actually propagate elements which may propagate
1069 for i in mightReplaceUpSiblings:
1070 opcode = Opcode(opcodes[i])
1071 opcode.setCommand(Opcode.Replace)
1072 opcodes[i] = opcode.asTuple()
1074 def createDiff(pub, oldTree, newTree):
1075 """Create and return a diff document from `oldTree` to `newTree`."""
1076 dispatcher = DocutilsDispatcher(new_reporter("DIFF", pub.settings))
1077 opcodes = doDiff(dispatcher, oldTree, newTree)
1079 if pub.settings.debug:
1080 # This may be expensive so guard this explicitly
1081 oldTree.reporter.debug(oldTree.asdom().toprettyxml())
1082 newTree.reporter.debug(newTree.asdom().toprettyxml())
1083 oldTree.reporter.debug(pformat(opcodes, 2, 40, None))
1084 oldTree.reporter.debug("^^^ Before cleaning vvv After cleaning")
1086 cleanOpcodes(opcodes, dispatcher, [ oldTree ], [ newTree ])
1088 if pub.settings.debug:
1089 # This may be expensive so guard this explicitly
1090 oldTree.reporter.debug(pformat(opcodes, 2, 40, None))
1092 if len(opcodes) != 1:
1093 raise TypeError("Don't know how to merge documents which are not rootEq")
1094 opcode = Opcode(opcodes[0])
1095 if opcode.getCommand() not in ( Opcode.Descend, Opcode.Equal, ):
1096 # TODO There should be a sense making message for this case
1097 # because this may happen due to up propagation of replacements
1098 raise TypeError("Don't know how to merge top level opcode of type %r"
1099 % ( opcode.getCommand(), ))
1101 diffDoc = buildDocument(oldTree, newTree, pub.settings)
1102 if opcode.getCommand() == Opcode.Equal:
1103 # TODO Equality should be reported somehow
1104 diffDoc.extend([ child.deepcopy()
1105 for child in newTree.children ])
1106 else:
1107 buildTree(dispatcher, diffDoc, opcode.getSubOpcodes(), oldTree, newTree)
1108 return diffDoc
1110 if __name__ == '__main__':
1111 pub = processCommandLine()
1113 useOptions(pub.settings, oldOption)
1114 oldTree = readTree(pub, pub.settings._old_source)
1115 useOptions(pub.settings, newOption)
1116 newTree = readTree(pub, pub.settings._new_source)
1117 useOptions(pub.settings, bothOption)
1119 Text2Words(oldTree).apply()
1120 Text2Words(newTree).apply()
1122 diffDoc = createDiff(pub, oldTree, newTree)
1123 Words2Text(diffDoc).apply()
1125 pub.writer.write(diffDoc, pub.destination)
1126 pub.writer.assemble_parts()
1128 # TODO The CSS classes need to be set in a CSS stylesheet