Debugging.
[docutils/kirr.git] / sandbox / rstdiff / rstdiff.py
blob9826540c86782966fe421ed5421934525e7fd908
1 #!/usr/bin/env python
3 # Copyright (C) 2010 Stefan Merten
5 # rstdiff.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18 # 02111-1307, USA.
20 """
21 Generates a structural diff from two reStructuredText input documents
22 and produces an annotated result.
23 """
25 __docformat__ = 'reStructuredText'
27 try:
28 import locale
29 locale.setlocale(locale.LC_ALL, '')
30 except:
31 pass
33 import os, re, sys
35 from pprint import pformat
36 from optparse import SUPPRESS_HELP
38 import docutils
39 from docutils import frontend, writers, nodes, SettingsSpec
40 from docutils.core import Publisher
41 from docutils.utils import SystemMessage, Reporter, new_reporter, new_document
42 from docutils.frontend import OptionParser, make_paths_absolute, validate_boolean
43 from docutils.transforms import Transform
45 from treediff import TreeMatcher, HashableNodeImpl
47 ###############################################################################
48 ###############################################################################
49 # Command line specification
51 description = ("""Generates a structural diff from two reStructuredText input
52 documents and produces an annotated result. """)
54 writerOption = 'writer'
55 writerDefault = 'xml'
56 writerArgRE1 = '^--' + writerOption + '=' + '(.*)$'
58 oldOption = 'old'
59 bothOption = 'both'
60 newOption = 'new'
62 def switchOptionsCallback(option, opt, value, parser, to):
63 """Callback for `optparse`."""
64 switchOptions(parser.values, to)
66 settings_spec = (
67 'rstdiff options',
68 None,
69 (('Select writer to write output with (default "xml").',
70 ['--' + writerOption],
71 {}),
72 ('Following options apply to the old input document'
73 + ' (default: both input documents).',
74 ['--' + oldOption],
75 { 'action': 'callback',
76 'callback': switchOptionsCallback,
77 'callback_args': ( oldOption, ),
78 }),
79 ('Following options apply to the new input document'
80 + ' (default: both input documents).',
81 ['--' + newOption],
82 { 'action': 'callback',
83 'callback': switchOptionsCallback,
84 'callback_args': ( newOption, ),
85 }),
86 ('Following options apply to both input documents'
87 + ' (default).',
88 ['--' + bothOption],
89 { 'action': 'callback',
90 'callback': switchOptionsCallback,
91 'callback_args': ( bothOption, ),
92 }),
93 ('Compare sections by comparing their names (default); '
94 + 'useful when section titles are stable but sections change',
95 ['--compare-sections-by-names'],
96 { 'action': 'store_true',
97 'default': 1, 'validator': validate_boolean}),
98 ('Compare sections normally; useful when section titles change',
99 ['--compare-sections-normally'],
100 { 'action': 'store_false', 'dest': 'compare_sections_by_names'}),
101 (SUPPRESS_HELP, ['--dump-rstdiff'], {'action': 'store_true'}),
105 settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace',
106 writerOption: writerDefault}
108 config_section = 'rstdiff'
110 usage = '%prog [options]... <old> [<new> [<output>]]'
112 ###############################################################################
113 # Classes for three argument command lines
115 switchableMultiOptions = ( 'strip_elements_with_classes', 'strip_classes', )
116 switchableOptions = (
117 'title', 'generator', 'datestamp',
118 'source_link', 'source_url',
119 'toc_backlinks', 'footnote_backlinks',
120 'sectnum_xform', 'doctitle_xform', 'docinfo_xform', 'sectsubtitle_xform',
121 'strip_comments',
122 'input_encoding', 'input_encoding_error_handler',
123 'language_code',
124 'pep_references', 'pep_base_url', 'pep_file_url_template',
125 'rfc_references', 'rfc_base_url',
126 'trim_footnote_reference_space',
127 'file_insertion_enabled', 'raw_enabled',
128 'auto_id_prefix', 'id_prefix',
129 ) + switchableMultiOptions
131 def switchOptions(values, to):
132 """Switch `values` so following options apply to input document `to`."""
133 lastTo = getattr(values, '_optionsTo', '_' + bothOption)
134 lastTarget = getattr(values, lastTo, None)
135 if not lastTarget:
136 lastTarget = {}
137 setattr(values, lastTo, lastTarget)
138 target = getattr(values, '_' + to, None)
139 if not target:
140 target = {}
141 setattr(values, to, target)
142 for opt in switchableOptions:
143 if hasattr(values, opt):
144 # Save last option
145 lastTarget[opt] = getattr(values, opt)
146 delattr(values, opt)
147 if opt in target:
148 # Restore old option
149 setattr(values, opt, target[opt])
150 values._optionsTo = '_' + to
152 def useOptions(values, to):
153 """Set `values` so use options applying to input document `to`."""
154 for opt in switchableOptions:
155 if hasattr(values, opt):
156 delattr(values, opt)
157 for src in ( '_' + to, '_' + bothOption, ):
158 if hasattr(values, src) and opt in getattr(values, src):
159 if opt in switchableMultiOptions:
160 if not hasattr(values, opt):
161 setattr(values, opt, [])
162 if getattr(values, src)[opt] is not None:
163 getattr(values, opt).extend(getattr(values, src)[opt])
164 else:
165 setattr(values, opt, getattr(values, src)[opt])
166 break
168 class Publisher3Args(Publisher):
170 def setup_option_parser(self, usage=None, description=None,
171 settings_spec=None, config_section=None,
172 **defaults):
173 if config_section:
174 if not settings_spec:
175 settings_spec = SettingsSpec()
176 settings_spec.config_section = config_section
177 parts = config_section.split()
178 if len(parts) > 1 and parts[-1] == 'application':
179 settings_spec.config_section_dependencies = ['applications']
180 #@@@ Add self.source & self.destination to components in future?
181 option_parser = OptionParser3Args(
182 components=(self.parser, self.reader, self.writer, settings_spec),
183 defaults=defaults, read_config_files=1,
184 usage=usage, description=description)
185 return option_parser
187 class OptionParser3Args(OptionParser):
189 def check_values(self, values, args):
190 """Store positional arguments as runtime settings."""
191 # Complete a possible switch
192 switchOptions(values, bothOption)
193 values._old_source, values._new_source, values._destination = self.check_args(args)
194 make_paths_absolute(values.__dict__, self.relative_path_settings,
195 os.getcwd())
196 values._config_files = self.config_files
197 return values
199 def check_args(self, args):
200 old_source = new_source = destination = None
201 if not args:
202 self.error('At least 1 argument required.')
203 else:
204 old_source = args.pop(0)
205 if old_source == '-': # means stdin
206 old_source = None
207 if args:
208 new_source = args.pop(0)
209 if new_source == '-': # means stdin
210 new_source = None
211 if args:
212 destination = args.pop(0)
213 if destination == '-': # means stdout
214 destination = None
215 if args:
216 self.error('Maximum 3 arguments allowed.')
217 if old_source is None and new_source is None:
218 self.error('Old and new source may not both use stdin.')
219 if (old_source and old_source == destination
220 or new_source and new_source == destination):
221 self.error('Do not specify the same file for both source and '
222 'destination. It will clobber the source file.')
223 return old_source, new_source, destination
225 ###############################################################################
226 ###############################################################################
227 # Helpers
229 class Opcode(object):
230 """Encapsulates opcodes as returned by `TreeMatcher.get_opcodes()`"""
232 Replace = 'replace'
233 Delete = 'delete'
234 Insert = 'insert'
235 Equal = 'equal'
236 Descend = 'descend'
238 _tuple = None
240 def __init__(self, opcodeTuple):
241 """Initialize from a tuple returned by `TreeMatcher.get_opcodes()`"""
242 self._tuple = list(opcodeTuple)
244 def getCommand(self):
245 """Return the command."""
246 return self._tuple[0]
248 def getOldRange(self):
249 """Returns the range pertaining to an old list."""
250 return ( self._tuple[1], self._tuple[2], )
252 def getNewRange(self):
253 """Returns the range pertaining to a new list."""
254 return ( self._tuple[3], self._tuple[4], )
256 def getSubOpcodes(self):
257 """Return the sub-opcodes in case of `command` == 'descend' or
258 `None`."""
259 if self._tuple[0] != self.Descend:
260 return None
261 return self._tuple[5]
263 def resolveOpcode(self, oldList, newList):
264 """Resolves opcode pertaining to `oldList` and `newList`. Returns tuple
265 consisting of
267 command
268 Same as self.getCommand().
270 oldRange
271 The range of elements in `oldList` affected by the opcode.
273 newRange
274 The range of elements in `newList` affected by the opcode.
276 subOpcodes
277 Same as self.getSubOpcodes().
279 oldRange = self.getOldRange()
280 newRange = self.getNewRange()
281 return ( self.getCommand(), oldList[oldRange[0]:oldRange[1]],
282 newList[newRange[0]:newRange[1]], self.getSubOpcodes())
284 def setSubOpcodes(self, opcodes):
285 """Set the sub-opcodes to a new list."""
286 if self._tuple[0] != self.Descend:
287 raise TypeError("Can not set subopcodes of a %r opcode"
288 % ( self._tuple[0], ))
289 self._tuple[5] = opcodes
291 def setCommand(self, command):
292 """Set a new command adapting subopcodes."""
293 if self._tuple[0] == command:
294 return
295 self._tuple[0] = command
296 if command == self.Descend:
297 self._tuple[5] = [ ]
298 else:
299 self._tuple = self._tuple[0:5]
301 def setOldRange(self, range):
302 """Sets the range pertaining to an old list."""
303 ( self._tuple[1], self._tuple[2], ) = range
305 def setNewRange(self, range):
306 """Sets the range pertaining to a new list."""
307 ( self._tuple[3], self._tuple[4], ) = range
309 def asTuple(self):
310 """Return the opcode as a tuple."""
311 return tuple(self._tuple)
313 ###############################################################################
314 ###############################################################################
315 # Additional docutils stuff
317 ###############################################################################
318 # Node types
320 class White(nodes.Text):
321 """A piece of text containing only whitespace."""
323 tagname = '#white'
325 """A regular expression matching strings for this class and returning
326 them as the first match."""
327 # TODO Could be subject to an option
328 re = '(\\s+)'
330 class Word(nodes.Text):
331 """A piece of text containing exactly one word."""
333 tagname = '#word'
335 @staticmethod
336 def splitText(text):
337 """Splits text and returns a sequence of `Word` and `White`
338 objects. Returns an empty sequence for an empty `text`."""
340 subs = re.split(White.re, text.astext())
341 result = [ ]
342 if not subs:
343 return result
344 elif re.match(White.re, subs[0]):
345 ( current, next, ) = ( White, Word, )
346 else:
347 ( current, next, ) = ( Word, White, )
348 for sub in subs:
349 result.append(current(sub))
350 ( current, next, ) = ( next, current, )
351 return result
353 ###############################################################################
354 # Transformers
356 class Text2Words(Transform):
357 """Transforms a `Text` node into a sequence of `Word`/`White`."""
359 def apply(self):
360 self.document.walk(Text2WordsVisitor(self.document))
362 class Text2WordsVisitor(nodes.SparseNodeVisitor):
364 def visit_Text(self, text):
365 words = Word.splitText(text)
366 if not words:
367 # An empty text
368 words = [ White(''), ]
369 text.parent.replace(text, words)
371 class Words2Text(Transform):
372 """Transforms a sequence of `Word`/`White` into a `Text` node."""
374 def apply(self):
375 self.document.walk(Words2TextVisitor(self.document))
377 class Words2TextVisitor(nodes.SparseNodeVisitor):
379 def visit_Text(self, text):
380 parent = text.parent
381 # Find this node and the first node of the sequence it belongs to
382 first = None
383 for i in range(len(parent)):
384 if not isinstance(parent[i], nodes.Text):
385 first = None
386 elif first is None:
387 first = i
388 # ``parent.index(text)`` uses value equality - can not be
389 # used here to find `text`
390 if id(parent[i]) == id(text):
391 end = i + 1
392 break
393 else:
394 raise IndexError("Can not find %r in its parent" % ( text, ))
396 if (len(parent) > end
397 and isinstance(parent[end], nodes.Text)):
398 # The visitor processes following children even if they are
399 # deleted - so work for last node of a sequence
400 return
402 texts = nodes.Text(reduce(lambda s, node: s + node.astext(),
403 parent[first:end], ""))
404 parent[first:end] = ( texts, )
406 visit_White = visit_Text
408 visit_Word = visit_Text
410 ###############################################################################
411 ###############################################################################
412 # Hashable
414 class DocutilsDispatcher(HashableNodeImpl):
415 """Implements hashable for a docutils `Node` and supports construction."""
417 reporter = None
419 def __init__(self, reporter):
420 super(self.__class__, self).__init__(nodes.Node)
421 self.reporter = reporter
423 def dispatchClass(self, function, node, *args):
424 """Dispatch a call of type `function` for the class of `node` using
425 arguments `node` and `args`. Default is to dispatch for imaginary class
426 "UNKNOWN"."""
427 pat = "%s_%%s" % ( function, )
428 try:
429 name = pat % ( node.__class__.__name__, )
430 method = getattr(self, name)
431 except AttributeError:
432 name = pat % ( 'UNKNOWN', )
433 method = getattr(self, name)
434 self.reporter.debug("*** %s(%s)"
435 % ( name, ", ".join([ arg.__class__.__name__
436 for arg
437 in ( node, ) + args ]), ))
438 for arg in ( node, ) + args:
439 try:
440 self.reporter.debug(" > %s" % ( arg, ))
441 except UnicodeEncodeError:
442 self.reporter.debug(" > CANNOT OUTPUT ARGUMENT OF TYPE %s"
443 % ( type(arg), ))
444 result = method(node, *args)
445 try:
446 self.reporter.debug(" < %s" % ( result, ))
447 except UnicodeEncodeError:
448 self.reporter.debug(" < CANNOT OUTPUT RESULT OF TYPE %s"
449 % ( type(result), ))
450 return result
452 ###########################################################################
453 ###########################################################################
454 # Implementation of abstract methods for `HashableNodeImpl`
456 def rootHash(self, node):
457 """Return a hash for the root only. Subclasses must override
458 this."""
459 return self.dispatchClass('rootHash', node)
461 def rootHash_UNKNOWN(self, node):
462 return hash(node.__class__)
464 def rootEq(self, node, other):
465 """Returns root equality of `node` and an `other` node. ``True`` if
466 the two nodes as roots are equal without considering their
467 children. This should be true if one node can be replaced by
468 the other and all changes can be represented without changing
469 the node itself. Subclasses must override this."""
470 # Only nodes of the same class can be equal - this assumption
471 # is used in many places
472 if node.__class__ != other.__class__:
473 return False
474 return self.dispatchClass('rootEq', node, other)
476 def rootEq_UNKNOWN(self, node, other):
477 # Unless we know better two roots of the same type are considered equal
478 return True
480 def childHash(self, node):
481 """Return a hash for the node as a child. Subclasses must override
482 this."""
483 return self.dispatchClass('childHash', node)
485 def childHash_UNKNOWN(self, node):
486 # By default compare as a child by comparing children
487 return self.childrenHash(node)
489 def childEq(self, node, other):
490 """Returns equality of `node` and an `other` node as children.
491 ``True`` if the child features of the two nodes are equal
492 without considering the root. Subclasses must override
493 this."""
494 # Only nodes of the same class can be equal - this assumption
495 # is used in many places
496 if node.__class__ != other.__class__:
497 return False
498 return self.dispatchClass('childEq', node, other)
500 def childEq_UNKNOWN(self, node, other):
501 # By default compare as a child by comparing children
502 return self.childrenEq(node, other)
504 def getChildren(self, node):
505 """Return the children of `node` as a list. Subclasses must override
506 this."""
507 return self.dispatchClass('getChildren', node)
509 def getChildren_UNKNOWN(self, node):
510 return node.children
512 ###########################################################################
513 ###########################################################################
514 # Merging
516 # TODO The resulting class names should be configurable
517 NewDelete = 'removed'
518 NewInsert = 'added'
519 NewReplaced = 'replaced'
520 NewReplacement = 'replacement'
522 def copyRoot(self, node):
523 """Copy `node` as root and return it."""
524 return self.dispatchClass('copyRoot', node)
526 def copyRoot_UNKNOWN(self, node):
527 return node.copy()
529 def addChild(self, root, child):
530 """Add `child` to `root`."""
531 return self.dispatchClass('addChild', root, child)
533 def addChild_UNKNOWN(self, root, child):
534 root.append(child)
536 def copyChild(self, node, newType):
537 """Copy `node` as child and return it. `newType` is ``None`` for an
538 unchanged child or the change type."""
539 return self.dispatchClass('copyChild', node, newType)
541 def copyChild_UNKNOWN(self, node, newType):
542 return self.setNewType(node.deepcopy(), newType)
544 def copyChildren(self, head, tail, root, newType):
545 """Return a range of new nodes copied from [ `head` ] + `tail` under
546 `root`. `tail` are all the same class as `head`. Nodes are
547 created approproate to type `newType`."""
548 return self.dispatchClass('copyChildren', head, tail, root, newType)
550 def copyChildren_UNKNOWN(self, head, tail, root, newType):
551 return [ self.copyChild(child, newType)
552 for child in [ head, ] + tail ]
554 def copyRange(self, root, children, newType):
555 """Return a range of new nodes copied from `children` under `root`.
556 Nodes are created appropriate to type `newType`."""
557 result = [ ]
558 begin = 0
559 while begin < len(children):
560 first = children[begin]
561 end = begin + 1
562 while end < len(children):
563 last = children[end]
564 if not(first.__class__ == last.__class__
565 or (isinstance(first, nodes.Text)
566 and isinstance(last, nodes.Text))):
567 break
568 end += 1
569 result.extend(self.copyChildren(first, children[begin + 1:end],
570 root, newType))
571 begin = end
572 return result
574 def mergeChildren(self, diffRoot, oldRoot, newRoot,
575 command, oldRange, newRange):
576 """Add children to `diffRoot` merging children `oldRange` / `newRange`
577 of `oldRoot` / `newRoot` by `command`."""
578 if command == Opcode.Equal:
579 for old in oldRange:
580 self.addChild(diffRoot, self.copyChild(old, None))
581 elif command == Opcode.Insert or command == Opcode.Delete:
582 if command == Opcode.Insert:
583 srcRoot = newRoot
584 srcRange = newRange
585 newType = self.NewInsert
586 else:
587 srcRoot = oldRoot
588 srcRange = oldRange
589 newType = self.NewDelete
590 for newChild in self.copyRange(srcRoot, srcRange, newType):
591 self.addChild(diffRoot, newChild)
592 elif command == Opcode.Replace:
593 # TODO Replacement doubles elements. This needs to be
594 # reflected properly in the @ids. If the @ids don't change
595 # there need to be unique @ids for replaced elements. This
596 # needs also to be reflected in referring @refid and
597 # @backrefs.
598 for newChild in self.copyRange(oldRoot, oldRange,
599 self.NewReplaced):
600 self.addChild(diffRoot, newChild)
601 for newChild in self.copyRange(newRoot, newRange,
602 self.NewReplacement):
603 self.addChild(diffRoot, newChild)
604 else:
605 raise TypeError("Unhandled command %r" % ( command, ))
607 ###########################################################################
608 ###########################################################################
609 # Helpers
611 def setNewType(self, node, newType):
612 """Set a class on `node` for `newType` if set. Returns `node`."""
613 if newType:
614 node['classes'].append("change-%s" % ( newType, ))
615 return node
617 ###########################################################################
618 ###########################################################################
619 # Real comparison and merging
621 # The idea is like this: Each node has attributes which need to be
622 # compared as root and it has attributes which need to be compared
623 # as child. This is different for every node type.
625 # Similarly each node type may need special methods for cloning
626 # and merging.
628 ###########################################################################
629 # Text / Word / White
631 def rootHash_Text(self, node):
632 return hash(node.astext())
634 rootHash_Word = rootHash_Text
636 def rootHash_White(self, node):
637 # Whitespace compares all equal
638 return hash('')
640 def rootEq_Text(self, node, other):
641 return node.astext() == other.astext()
643 rootEq_Word = rootEq_Text
645 def rootEq_White(self, node, other):
646 # TODO Must behave different for places where whitespace
647 # differences are relevant
648 return True
650 # Text behaves the same as root or child
652 childHash_Text = rootHash_Text
653 childHash_Word = rootHash_Word
654 childHash_White = rootHash_White
656 childEq_Text = rootEq_Text
657 childEq_Word = rootEq_Word
658 childEq_White = rootEq_White
660 def copyChildren_Text(self, head, tail, root, newType):
661 if not tail and isinstance(head, nodes.Text) and not head.astext():
662 # Do not create empty inlines
663 return [ ]
664 inline = nodes.inline()
665 self.setNewType(inline, newType)
666 inline.extend([ head, ] + tail)
667 return [ inline, ]
669 # Sequences of Text are treated together
670 copyChildren_Word = copyChildren_Text
671 copyChildren_White = copyChildren_Text
673 ###########################################################################
674 # section
676 def getSectionName(self, node):
677 """Return the best name for `node`."""
678 if node['dupnames']:
679 return node['dupnames'][0]
680 if node['names']:
681 return node['names'][0]
682 if node['ids']:
683 return node['ids'][0]
684 return '' # No idea...
686 def rootEq_section(self, node, other):
687 """Compare sections by their names or normally."""
688 if node.document.settings.compare_sections_by_names:
689 return self.getSectionName(node) == self.getSectionName(other)
690 return True
692 ###########################################################################
693 # For some elements their attributes need to be considered to
694 # detect changes.
696 def attributeEq(self, node, other, attribute):
697 if (attribute in node) != (attribute in other):
698 return False
699 if not attribute in node:
700 return True
701 return node[attribute] == other[attribute]
703 ###########################################################################
704 # reference
706 def rootEq_reference(self, node, other):
707 return self.attributeEq(node, other, 'refuri')
709 ###########################################################################
710 # target
712 def rootEq_target(self, node, other):
713 return self.attributeEq(node, other, 'refuri')
715 ###########################################################################
716 # bullet_list
718 # TODO This is typically a minor change and should be requested by
719 # a special option
721 def attributeEq_bullet_list(self, node, other):
722 return self.attributeEq(node, other, 'bullet')
724 def rootEq_bullet_list(self, node, other):
725 return self.attributeEq_bullet_list(node, other)
727 def childEq_bullet_list(self, node, other):
728 return (self.attributeEq_bullet_list(node, other)
729 and self.childrenEq(node, other))
731 ###########################################################################
732 # enumerated_list
734 # TODO This is typically a minor change and should be requested by
735 # a special option
737 def attributeEq_enumerated_list(self, node, other):
738 return (self.attributeEq(node, other, 'enumtype')
739 and self.attributeEq(node, other, 'prefix')
740 and self.attributeEq(node, other, 'suffix')
741 and self.attributeEq(node, other, 'start'))
743 def rootEq_enumerated_list(self, node, other):
744 return self.attributeEq_enumerated_list(node, other)
746 def childEq_enumerated_list(self, node, other):
747 return (self.attributeEq_enumerated_list(node, other)
748 and self.childrenEq(node, other))
750 ###########################################################################
751 # image
753 def rootEq_image(self, node, other):
754 if node.__class__ != other.__class__:
755 return False
756 return self.attributeEq(node, other, 'uri')
758 ###########################################################################
759 # Some elements may contain only #PCDATA. They need to propagate
760 # changes in their children up to the element itself.
762 def rootEqWithChildren(self, node, other):
763 if node.__class__ != other.__class__:
764 return False
765 return self.childrenEq(node, other)
767 ###########################################################################
768 # comment
770 rootEq_comment = rootEqWithChildren
772 ###########################################################################
773 # literal
775 rootEq_literal = rootEqWithChildren
777 ###########################################################################
778 # option_string
780 rootEq_option_string = rootEqWithChildren
782 ###########################################################################
783 # label
785 # TODO This is typically a minor change and should be requested by
786 # a special option
788 rootEq_label = rootEqWithChildren
790 ###########################################################################
791 # footnote_reference
793 # TODO This is typically a minor change and should be requested by
794 # a special option
796 rootEq_footnote_reference = rootEqWithChildren
798 ###########################################################################
799 # citation_reference
801 # TODO This is typically a minor change and should be requested by
802 # a special option
804 rootEq_citation_reference = rootEqWithChildren
806 ###########################################################################
807 # For some elements their attributes need to be considered to
808 # detect changes *and* they may contain only #PCDATA.
810 ###########################################################################
811 # option_argument
813 # TODO This is typically a minor change and should be requested by
814 # a special option
816 def attributeEq_option_argument(self, node, other):
817 return self.attributeEq(node, other, 'delimiter')
819 def rootEq_option_argument(self, node, other):
820 return (self.attributeEq_option_argument(node, other)
821 and self.rootEqWithChildren(node, other))
823 def childEq_option_argument(self, node, other):
824 return (self.attributeEq_option_argument(node, other)
825 and self.childrenEq(node, other))
827 ###########################################################################
828 # A change in certain elements must propagate the change up since
829 # they may occur only once. Must be done by parents.
831 # Checks whether `node` and `other` have both a node of type
832 # `childClass` and whether the first of thosee are equal.
833 def rootEqWithChild(self, node, other, childClass):
834 if node.__class__ != other.__class__:
835 return False
837 nodeFound = None
838 for nodeChild in self.getChildren(node):
839 if isinstance(nodeChild, childClass):
840 nodeFound = nodeChild
841 break
843 otherFound = None
844 for otherChild in self.getChildren(other):
845 if isinstance(otherChild, childClass):
846 otherFound = otherChild
847 break
849 if nodeFound is None or otherFound is None:
850 return True
852 return self.childEq(nodeFound, otherFound)
854 ###########################################################################
855 # footnote
857 def rootEq_footnote(self, node, other):
858 return self.rootEqWithChild(node, other, nodes.label)
860 ###########################################################################
861 # citation
863 def rootEq_citation(self, node, other):
864 return self.rootEqWithChild(node, other, nodes.label)
866 ###########################################################################
867 # option
869 def rootEq_option(self, node, other):
870 return self.rootEqWithChild(node, other, nodes.option_string)
872 ###########################################################################
873 # Some attributes of some elements depend on their concrete parents.
875 # tgroup
876 def copyRoot_tgroup(self, node):
877 copy = node.copy()
878 copy['origcols'] = copy['cols']
879 copy['cols'] = 0
880 return copy
882 def addChild_tgroup(self, root, child):
883 root.append(child)
884 # This works only if for each column there is a `colspec`. Is
885 # this the case?
886 if isinstance(child, nodes.colspec):
887 root['cols'] += 1
888 elif isinstance(child, nodes.tbody):
889 # All columns seen - check the column widths
890 if root['origcols'] != root['cols']:
891 for elem in root:
892 if isinstance(elem, nodes.colspec):
893 elem['colwidth'] = 100 / root['cols']
894 del root['origcols']
896 # TODO Number of entries must change according to the (changed)
897 # number of columns; for added or removed columns entries of *one*
898 # column must be added / removed
900 ###############################################################################
901 ###############################################################################
902 # Main
904 def processCommandLine():
905 """Process command line and return a `Publisher`."""
906 # Determine writer here so options can be given normally
907 preWriter = writerDefault
908 for arg in sys.argv:
909 match = re.search(writerArgRE1, arg)
910 if match:
911 preWriter = match.group(1)
913 pub = Publisher3Args()
914 pub.set_reader('standalone', None, 'restructuredtext')
915 pub.set_writer(preWriter)
917 settingsSpec = SettingsSpec()
918 settingsSpec.settings_spec = settings_spec
919 settingsSpec.settings_defaults = settings_defaults
920 pub.process_command_line(usage=usage, description=description,
921 settings_spec=settingsSpec,
922 config_section=config_section)
923 if pub.settings.writer != preWriter:
924 new_reporter('<cmdline>',
925 pub.settings).severe("Internal error: Mismatch of pre-parsed (%r) and real (%r) writer"
926 % ( preWriter, pub.settings.writer, ))
927 pub.set_destination()
928 return pub
930 def readTree(pub, sourceName):
931 """Read and return a tree from `sourceName`."""
932 # Reset reader - just in case it keeps state from a previous invocation
933 pub.set_reader('standalone', None, 'restructuredtext')
934 pub.set_source(None, sourceName)
935 pub.document = None
936 pub.document = pub.reader.read(pub.source, pub.parser, pub.settings)
937 pub.apply_transforms()
938 return pub.document
940 def doDiff(hashableNodeImpl, oldTree, newTree):
941 """Create a difference from `oldTree` to `newTree` using
942 `hashableNodeImpl`. Returns the opcodes necessary to transform
943 `oldTree` to `newTree`."""
944 matcher = TreeMatcher(hashableNodeImpl, oldTree, newTree,
945 lambda node: isinstance(node, White))
946 return matcher.get_opcodes()
948 def buildDocument(oldTree, newTree, settings):
949 """Returns a new document for the result of converting `oldTree` to
950 `newTree`."""
951 if (not isinstance(oldTree, docutils.nodes.document)
952 or not isinstance(newTree, docutils.nodes.document)):
953 raise TypeError("Roots of trees must be documents")
954 return new_document(u"%s => %s"
955 % ( settings._old_source, settings._new_source, ),
956 settings)
958 def buildTree(dispatcher, diffRoot, opcodes, oldRoot, newRoot):
959 """Adds a new sub-tree under `diffRoot` converting children of
960 `oldRoot` to `newRoot` using `opcodes`."""
961 oldChildren = dispatcher.getChildren(oldRoot)
962 newChildren = dispatcher.getChildren(newRoot)
963 for opcode in opcodes:
964 ( command, oldRange, newRange,
965 subOpcodes, ) = Opcode(opcode).resolveOpcode(oldChildren, newChildren)
966 if command == Opcode.Descend:
967 child = dispatcher.copyRoot(oldRange[0])
968 dispatcher.addChild(diffRoot, child)
969 buildTree(dispatcher, child,
970 subOpcodes, oldRange[0], newRange[0])
971 else:
972 dispatcher.mergeChildren(diffRoot, oldRoot, newRoot,
973 command, oldRange, newRange)
975 # A replacement in certain elements must not be propagated up since
976 # they may occur only once and replacement would double them
977 replaceNotUp = ( nodes.title, nodes.subtitle, nodes.term, nodes.field_name,
978 nodes.attribution, nodes.caption, # (%text.model)
979 nodes.header, nodes.footer, nodes.definition,
980 nodes.field_body, nodes.description, nodes.legend,
981 nodes.entry, # (%body.elements;+) or (%body.elements;*)
982 nodes.decoration, nodes.docinfo, nodes.transition,
983 nodes.option_group, nodes.thead,
984 nodes.tbody, # different content model
987 # A replacement in certain elements normally not subject to up
988 # propagation and contained in certain elements may propagate up if
989 # all their siblings are also replacements and would propagate up
990 replaceUpSiblings = (
991 ( nodes.title, nodes.section, ),
992 ( nodes.subtitle, nodes.section, ),
993 ( nodes.term, nodes.definition_list_item, ),
994 ( nodes.field_name, nodes.field, ),
995 ( nodes.attribution, nodes.block_quote, ),
996 ( nodes.caption, nodes.figure, ),
997 ( nodes.definition, nodes.definition_list_item, ),
998 ( nodes.field_body, nodes.field, ),
999 ( nodes.description, nodes.option_list_item, ),
1000 ( nodes.legend, nodes.figure, ),
1001 ( nodes.option_group, nodes.option_list_item, ),
1004 # TODO If much text is replaced in a text element the whole element
1005 # should be replaced. This makes more sense to people than two large
1006 # replaced/replacement blocks where the only equality is in words like
1007 # "the". The exact meaning of "much" should be an option.
1008 def cleanOpcodes(opcodes, dispatcher, oldList, newList):
1009 """Replace some nasty results in `opcodes` by cleaner versions. Opcodes
1010 create `newList` from `oldList`."""
1011 mightReplaceUpSiblings = [ ]
1012 for i in range(len(opcodes)):
1013 opcode = Opcode(opcodes[i])
1014 ( command, oldRange, newRange, subOpcodes,
1015 ) = opcode.resolveOpcode(oldList, newList)
1016 if not subOpcodes:
1017 # Nothing to clean for flat or empty opcodes
1018 continue
1020 oldNode = oldRange[0]
1021 newNode = newRange[0]
1022 cleanOpcodes(subOpcodes, dispatcher, dispatcher.getChildren(oldNode),
1023 dispatcher.getChildren(newNode))
1024 j = 1
1025 while j < len(subOpcodes):
1026 prev = Opcode(subOpcodes[j - 1])
1027 this = Opcode(subOpcodes[j])
1028 if (this.getCommand() != Opcode.Descend
1029 and prev.getCommand() == this.getCommand()):
1030 # Merge adjacing opcodes of same type
1031 prevOld = prev.getOldRange()
1032 prevNew = prev.getNewRange()
1033 thisOld = this.getOldRange()
1034 thisNew = this.getNewRange()
1035 prev.setOldRange(( prevOld[0], thisOld[1], ))
1036 prev.setNewRange(( prevNew[0], thisNew[1], ))
1037 subOpcodes[j - 1:j + 1] = [ prev.asTuple(), ]
1038 else:
1039 j += 1
1040 opcode.setSubOpcodes(subOpcodes)
1041 if len(subOpcodes) == 1:
1042 subOpcode = Opcode(subOpcodes[0])
1043 if subOpcode.getCommand() == Opcode.Descend:
1044 propagateUp = False
1045 elif subOpcode.getCommand() == Opcode.Replace:
1046 if any([ isinstance(oldNode, cls)
1047 for cls in replaceNotUp ]):
1048 propagateUp = False
1049 if any([ isinstance(oldNode, cls)
1050 and isinstance(oldNode.parent, parentCls)
1051 for ( cls, parentCls, ) in replaceUpSiblings ]):
1052 # If for instance a section/title would
1053 # propagate a replacement up the propagation
1054 # needs to be done if all siblings would
1055 # also propagate a replacement up
1056 mightReplaceUpSiblings.append(i)
1057 else:
1058 propagateUp = True
1059 else:
1060 propagateUp = True
1061 if propagateUp:
1062 # Propagate 1-element sequences up
1063 opcode.setCommand(subOpcode.getCommand())
1064 opcodes[i] = opcode.asTuple()
1066 if mightReplaceUpSiblings:
1067 # There are entries which might propagate a replace up if all
1068 # siblings could do as well
1069 if all([ i in mightReplaceUpSiblings
1070 or Opcode(opcodes[i]).getCommand() == Opcode.Replace
1071 for i in range(len(opcodes)) ]):
1072 # All entries are replacements which may propagate up -
1073 # actually propagate elements which may propagate
1074 for i in mightReplaceUpSiblings:
1075 opcode = Opcode(opcodes[i])
1076 opcode.setCommand(Opcode.Replace)
1077 opcodes[i] = opcode.asTuple()
1079 def createDiff(pub, oldTree, newTree):
1080 """Create and return a diff document from `oldTree` to `newTree`."""
1081 realDebug = pub.settings.debug
1082 pub.settings.debug = pub.settings.dump_rstdiff
1083 reporter = new_reporter("RSTDIFF", pub.settings)
1084 pub.settings.debug = realDebug
1085 dispatcher = DocutilsDispatcher(reporter)
1086 opcodes = doDiff(dispatcher, oldTree, newTree)
1088 if pub.settings.dump_rstdiff:
1089 reporter.debug(oldTree.asdom().toprettyxml())
1090 reporter.debug(newTree.asdom().toprettyxml())
1091 reporter.debug(pformat(opcodes, 2, 40, None))
1092 reporter.debug("^^^ Before cleaning vvv After cleaning")
1094 cleanOpcodes(opcodes, dispatcher, [ oldTree ], [ newTree ])
1096 if pub.settings.dump_rstdiff:
1097 reporter.debug(pformat(opcodes, 2, 40, None))
1099 if len(opcodes) != 1:
1100 raise TypeError("Don't know how to merge documents which are not rootEq")
1101 opcode = Opcode(opcodes[0])
1102 if opcode.getCommand() not in ( Opcode.Descend, Opcode.Equal, ):
1103 # TODO There should be a sense making message for this case
1104 # because this may happen due to up propagation of replacements
1105 raise TypeError("Don't know how to merge top level opcode of type %r"
1106 % ( opcode.getCommand(), ))
1108 diffDoc = buildDocument(oldTree, newTree, pub.settings)
1109 if opcode.getCommand() == Opcode.Equal:
1110 # TODO Equality should be reported somehow
1111 diffDoc.extend([ child.deepcopy()
1112 for child in newTree.children ])
1113 else:
1114 buildTree(dispatcher, diffDoc, opcode.getSubOpcodes(), oldTree, newTree)
1115 return diffDoc
1117 if __name__ == '__main__':
1118 pub = processCommandLine()
1120 useOptions(pub.settings, oldOption)
1121 oldTree = readTree(pub, pub.settings._old_source)
1122 useOptions(pub.settings, newOption)
1123 newTree = readTree(pub, pub.settings._new_source)
1124 useOptions(pub.settings, bothOption)
1126 Text2Words(oldTree).apply()
1127 Text2Words(newTree).apply()
1129 diffDoc = createDiff(pub, oldTree, newTree)
1130 Words2Text(diffDoc).apply()
1132 pub.writer.write(diffDoc, pub.destination)
1133 pub.writer.assemble_parts()
1135 # TODO The CSS classes need to be set in a CSS stylesheet