3 # Copyright (C) 2010 Stefan Merten
5 # rstdiff.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 Generates a structural diff from two reStructuredText input documents
22 and produces an annotated result.
25 __docformat__
= 'reStructuredText'
29 locale
.setlocale(locale
.LC_ALL
, '')
35 from pprint
import pformat
36 from optparse
import SUPPRESS_HELP
39 from docutils
import frontend
, writers
, nodes
, SettingsSpec
40 from docutils
.core
import Publisher
41 from docutils
.utils
import SystemMessage
, Reporter
, new_reporter
, new_document
42 from docutils
.frontend
import OptionParser
, make_paths_absolute
, validate_boolean
43 from docutils
.transforms
import Transform
45 from treediff
import TreeMatcher
, HashableNodeImpl
47 ###############################################################################
48 ###############################################################################
49 # Command line specification
51 description
= ("""Generates a structural diff from two reStructuredText input
52 documents and produces an annotated result. """)
54 writerOption
= 'writer'
56 writerArgRE1
= '^--' + writerOption
+ '=' + '(.*)$'
62 def switchOptionsCallback(option
, opt
, value
, parser
, to
):
63 """Callback for `optparse`."""
64 switchOptions(parser
.values
, to
)
69 (('Select writer to write output with (default "xml").',
70 ['--' + writerOption
],
72 ('Following options apply to the old input document'
73 + ' (default: both input documents).',
75 { 'action': 'callback',
76 'callback': switchOptionsCallback
,
77 'callback_args': ( oldOption
, ),
79 ('Following options apply to the new input document'
80 + ' (default: both input documents).',
82 { 'action': 'callback',
83 'callback': switchOptionsCallback
,
84 'callback_args': ( newOption
, ),
86 ('Following options apply to both input documents'
89 { 'action': 'callback',
90 'callback': switchOptionsCallback
,
91 'callback_args': ( bothOption
, ),
93 ('Compare sections by comparing their names (default); '
94 + 'useful when section titles are stable but sections change',
95 ['--compare-sections-by-names'],
96 { 'action': 'store_true',
97 'default': 1, 'validator': validate_boolean
}),
98 ('Compare sections normally; useful when section titles change',
99 ['--compare-sections-normally'],
100 { 'action': 'store_false', 'dest': 'compare_sections_by_names'}),
101 (SUPPRESS_HELP
, ['--dump-rstdiff'], {'action': 'store_true'}),
105 settings_defaults
= {'output_encoding_error_handler': 'xmlcharrefreplace',
106 writerOption
: writerDefault
}
108 config_section
= 'rstdiff'
110 usage
= '%prog [options]... <old> [<new> [<output>]]'
112 ###############################################################################
113 # Classes for three argument command lines
115 switchableMultiOptions
= ( 'strip_elements_with_classes', 'strip_classes', )
116 switchableOptions
= (
117 'title', 'generator', 'datestamp',
118 'source_link', 'source_url',
119 'toc_backlinks', 'footnote_backlinks',
120 'sectnum_xform', 'doctitle_xform', 'docinfo_xform', 'sectsubtitle_xform',
122 'input_encoding', 'input_encoding_error_handler',
124 'pep_references', 'pep_base_url', 'pep_file_url_template',
125 'rfc_references', 'rfc_base_url',
126 'trim_footnote_reference_space',
127 'file_insertion_enabled', 'raw_enabled',
128 'auto_id_prefix', 'id_prefix',
129 ) + switchableMultiOptions
131 def switchOptions(values
, to
):
132 """Switch `values` so following options apply to input document `to`."""
133 lastTo
= getattr(values
, '_optionsTo', '_' + bothOption
)
134 lastTarget
= getattr(values
, lastTo
, None)
137 setattr(values
, lastTo
, lastTarget
)
138 target
= getattr(values
, '_' + to
, None)
141 setattr(values
, to
, target
)
142 for opt
in switchableOptions
:
143 if hasattr(values
, opt
):
145 lastTarget
[opt
] = getattr(values
, opt
)
149 setattr(values
, opt
, target
[opt
])
150 values
._optionsTo
= '_' + to
152 def useOptions(values
, to
):
153 """Set `values` so use options applying to input document `to`."""
154 for opt
in switchableOptions
:
155 if hasattr(values
, opt
):
157 for src
in ( '_' + to
, '_' + bothOption
, ):
158 if hasattr(values
, src
) and opt
in getattr(values
, src
):
159 if opt
in switchableMultiOptions
:
160 if not hasattr(values
, opt
):
161 setattr(values
, opt
, [])
162 if getattr(values
, src
)[opt
] is not None:
163 getattr(values
, opt
).extend(getattr(values
, src
)[opt
])
165 setattr(values
, opt
, getattr(values
, src
)[opt
])
168 class Publisher3Args(Publisher
):
170 def setup_option_parser(self
, usage
=None, description
=None,
171 settings_spec
=None, config_section
=None,
174 if not settings_spec
:
175 settings_spec
= SettingsSpec()
176 settings_spec
.config_section
= config_section
177 parts
= config_section
.split()
178 if len(parts
) > 1 and parts
[-1] == 'application':
179 settings_spec
.config_section_dependencies
= ['applications']
180 #@@@ Add self.source & self.destination to components in future?
181 option_parser
= OptionParser3Args(
182 components
=(self
.parser
, self
.reader
, self
.writer
, settings_spec
),
183 defaults
=defaults
, read_config_files
=1,
184 usage
=usage
, description
=description
)
187 class OptionParser3Args(OptionParser
):
189 def check_values(self
, values
, args
):
190 """Store positional arguments as runtime settings."""
191 # Complete a possible switch
192 switchOptions(values
, bothOption
)
193 values
._old
_source
, values
._new
_source
, values
._destination
= self
.check_args(args
)
194 make_paths_absolute(values
.__dict
__, self
.relative_path_settings
,
196 values
._config
_files
= self
.config_files
199 def check_args(self
, args
):
200 old_source
= new_source
= destination
= None
202 self
.error('At least 1 argument required.')
204 old_source
= args
.pop(0)
205 if old_source
== '-': # means stdin
208 new_source
= args
.pop(0)
209 if new_source
== '-': # means stdin
212 destination
= args
.pop(0)
213 if destination
== '-': # means stdout
216 self
.error('Maximum 3 arguments allowed.')
217 if old_source
is None and new_source
is None:
218 self
.error('Old and new source may not both use stdin.')
219 if (old_source
and old_source
== destination
220 or new_source
and new_source
== destination
):
221 self
.error('Do not specify the same file for both source and '
222 'destination. It will clobber the source file.')
223 return old_source
, new_source
, destination
225 ###############################################################################
226 ###############################################################################
229 class Opcode(object):
230 """Encapsulates opcodes as returned by `TreeMatcher.get_opcodes()`"""
240 def __init__(self
, opcodeTuple
):
241 """Initialize from a tuple returned by `TreeMatcher.get_opcodes()`"""
242 self
._tuple
= list(opcodeTuple
)
244 def getCommand(self
):
245 """Return the command."""
246 return self
._tuple
[0]
248 def getOldRange(self
):
249 """Returns the range pertaining to an old list."""
250 return ( self
._tuple
[1], self
._tuple
[2], )
252 def getNewRange(self
):
253 """Returns the range pertaining to a new list."""
254 return ( self
._tuple
[3], self
._tuple
[4], )
256 def getSubOpcodes(self
):
257 """Return the sub-opcodes in case of `command` == 'descend' or
259 if self
._tuple
[0] != self
.Descend
:
261 return self
._tuple
[5]
263 def resolveOpcode(self
, oldList
, newList
):
264 """Resolves opcode pertaining to `oldList` and `newList`. Returns tuple
268 Same as self.getCommand().
271 The range of elements in `oldList` affected by the opcode.
274 The range of elements in `newList` affected by the opcode.
277 Same as self.getSubOpcodes().
279 oldRange
= self
.getOldRange()
280 newRange
= self
.getNewRange()
281 return ( self
.getCommand(), oldList
[oldRange
[0]:oldRange
[1]],
282 newList
[newRange
[0]:newRange
[1]], self
.getSubOpcodes())
284 def setSubOpcodes(self
, opcodes
):
285 """Set the sub-opcodes to a new list."""
286 if self
._tuple
[0] != self
.Descend
:
287 raise TypeError("Can not set subopcodes of a %r opcode"
288 % ( self
._tuple
[0], ))
289 self
._tuple
[5] = opcodes
291 def setCommand(self
, command
):
292 """Set a new command adapting subopcodes."""
293 if self
._tuple
[0] == command
:
295 self
._tuple
[0] = command
296 if command
== self
.Descend
:
299 self
._tuple
= self
._tuple
[0:5]
301 def setOldRange(self
, range):
302 """Sets the range pertaining to an old list."""
303 ( self
._tuple
[1], self
._tuple
[2], ) = range
305 def setNewRange(self
, range):
306 """Sets the range pertaining to a new list."""
307 ( self
._tuple
[3], self
._tuple
[4], ) = range
310 """Return the opcode as a tuple."""
311 return tuple(self
._tuple
)
313 ###############################################################################
314 ###############################################################################
315 # Additional docutils stuff
317 ###############################################################################
320 class White(nodes
.Text
):
321 """A piece of text containing only whitespace."""
325 """A regular expression matching strings for this class and returning
326 them as the first match."""
327 # TODO Could be subject to an option
330 class Word(nodes
.Text
):
331 """A piece of text containing exactly one word."""
337 """Splits text and returns a sequence of `Word` and `White`
338 objects. Returns an empty sequence for an empty `text`."""
340 subs
= re
.split(White
.re
, text
.astext())
344 elif re
.match(White
.re
, subs
[0]):
345 ( current
, next
, ) = ( White
, Word
, )
347 ( current
, next
, ) = ( Word
, White
, )
349 result
.append(current(sub
))
350 ( current
, next
, ) = ( next
, current
, )
353 ###############################################################################
356 class Text2Words(Transform
):
357 """Transforms a `Text` node into a sequence of `Word`/`White`."""
360 self
.document
.walk(Text2WordsVisitor(self
.document
))
362 class Text2WordsVisitor(nodes
.SparseNodeVisitor
):
364 def visit_Text(self
, text
):
365 words
= Word
.splitText(text
)
368 words
= [ White(''), ]
369 text
.parent
.replace(text
, words
)
371 class Words2Text(Transform
):
372 """Transforms a sequence of `Word`/`White` into a `Text` node."""
375 self
.document
.walk(Words2TextVisitor(self
.document
))
377 class Words2TextVisitor(nodes
.SparseNodeVisitor
):
379 def visit_Text(self
, text
):
381 # Find this node and the first node of the sequence it belongs to
383 for i
in range(len(parent
)):
384 if not isinstance(parent
[i
], nodes
.Text
):
388 # ``parent.index(text)`` uses value equality - can not be
389 # used here to find `text`
390 if id(parent
[i
]) == id(text
):
394 raise IndexError("Can not find %r in its parent" % ( text
, ))
396 if (len(parent
) > end
397 and isinstance(parent
[end
], nodes
.Text
)):
398 # The visitor processes following children even if they are
399 # deleted - so work for last node of a sequence
402 texts
= nodes
.Text(reduce(lambda s
, node
: s
+ node
.astext(),
403 parent
[first
:end
], ""))
404 parent
[first
:end
] = ( texts
, )
406 visit_White
= visit_Text
408 visit_Word
= visit_Text
410 class Generated2Inline(Transform
):
411 """Transforms a `generated` node into an `inline` node."""
414 self
.document
.walk(Generated2InlineVisitor(self
.document
))
416 class Generated2InlineVisitor(nodes
.SparseNodeVisitor
):
418 def visit_generated(self
, generated
):
419 inline
= nodes
.inline(text
=generated
.children
[0].astext(),
420 *generated
.children
[1:], **generated
.attributes
)
421 generated
.parent
.replace(generated
, inline
)
423 ###############################################################################
424 ###############################################################################
427 class DocutilsDispatcher(HashableNodeImpl
):
428 """Implements hashable for a docutils `Node` and supports construction."""
432 def __init__(self
, reporter
):
433 super(self
.__class
__, self
).__init
__(nodes
.Node
)
434 self
.reporter
= reporter
436 def dispatchClass(self
, function
, node
, *args
):
437 """Dispatch a call of type `function` for the class of `node` using
438 arguments `node` and `args`. Default is to dispatch for imaginary class
440 pat
= "%s_%%s" % ( function
, )
442 name
= pat
% ( node
.__class
__.__name
__, )
443 method
= getattr(self
, name
)
444 except AttributeError:
445 name
= pat
% ( 'UNKNOWN', )
446 method
= getattr(self
, name
)
447 self
.reporter
.debug("*** %s(%s)"
448 % ( name
, ", ".join([ arg
.__class
__.__name
__
450 in ( node
, ) + args
]), ))
451 for arg
in ( node
, ) + args
:
453 self
.reporter
.debug(" > %s" % ( arg
, ))
454 except UnicodeEncodeError:
455 self
.reporter
.debug(" > CANNOT OUTPUT ARGUMENT OF TYPE %s"
457 result
= method(node
, *args
)
459 self
.reporter
.debug(" < %s" % ( result
, ))
460 except UnicodeEncodeError:
461 self
.reporter
.debug(" < CANNOT OUTPUT RESULT OF TYPE %s"
465 ###########################################################################
466 ###########################################################################
467 # Implementation of abstract methods for `HashableNodeImpl`
469 def rootHash(self
, node
):
470 """Return a hash for the root only. Subclasses must override
472 return self
.dispatchClass('rootHash', node
)
474 def rootHash_UNKNOWN(self
, node
):
475 return hash(node
.__class
__)
477 def rootEq(self
, node
, other
):
478 """Returns root equality of `node` and an `other` node. ``True`` if
479 the two nodes as roots are equal without considering their
480 children. This should be true if one node can be replaced by
481 the other and all changes can be represented without changing
482 the node itself. Subclasses must override this."""
483 # Only nodes of the same class can be equal - this assumption
484 # is used in many places
485 if node
.__class
__ != other
.__class
__:
487 return self
.dispatchClass('rootEq', node
, other
)
489 def rootEq_UNKNOWN(self
, node
, other
):
490 # Unless we know better two roots of the same type are considered equal
493 def childHash(self
, node
):
494 """Return a hash for the node as a child. Subclasses must override
496 return self
.dispatchClass('childHash', node
)
498 def childHash_UNKNOWN(self
, node
):
499 # By default compare as a child by comparing children
500 return self
.childrenHash(node
)
502 def childEq(self
, node
, other
):
503 """Returns equality of `node` and an `other` node as children.
504 ``True`` if the child features of the two nodes are equal
505 without considering the root. Subclasses must override
507 # Only nodes of the same class can be equal - this assumption
508 # is used in many places
509 if node
.__class
__ != other
.__class
__:
511 return self
.dispatchClass('childEq', node
, other
)
513 def childEq_UNKNOWN(self
, node
, other
):
514 # By default compare as a child by comparing children
515 return self
.childrenEq(node
, other
)
517 def getChildren(self
, node
):
518 """Return the children of `node` as a list. Subclasses must override
520 return self
.dispatchClass('getChildren', node
)
522 def getChildren_UNKNOWN(self
, node
):
525 ###########################################################################
526 ###########################################################################
529 # TODO The resulting class names should be configurable
530 NewDelete
= 'removed'
532 NewReplaced
= 'replaced'
533 NewReplacement
= 'replacement'
535 def copyRoot(self
, node
):
536 """Copy `node` as root and return it."""
537 return self
.dispatchClass('copyRoot', node
)
539 def copyRoot_UNKNOWN(self
, node
):
542 def addChild(self
, root
, child
):
543 """Add `child` to `root`."""
544 return self
.dispatchClass('addChild', root
, child
)
546 def addChild_UNKNOWN(self
, root
, child
):
549 def copyChild(self
, node
, newType
):
550 """Copy `node` as child and return it. `newType` is ``None`` for an
551 unchanged child or the change type."""
552 return self
.dispatchClass('copyChild', node
, newType
)
554 def copyChild_UNKNOWN(self
, node
, newType
):
555 return self
.setNewType(node
.deepcopy(), newType
)
557 def copyChildren(self
, head
, tail
, root
, newType
):
558 """Return a range of new nodes copied from [ `head` ] + `tail` under
559 `root`. `tail` are all the same class as `head`. Nodes are
560 created approproate to type `newType`."""
561 return self
.dispatchClass('copyChildren', head
, tail
, root
, newType
)
563 def copyChildren_UNKNOWN(self
, head
, tail
, root
, newType
):
564 return [ self
.copyChild(child
, newType
)
565 for child
in [ head
, ] + tail
]
567 def copyRange(self
, root
, children
, newType
):
568 """Return a range of new nodes copied from `children` under `root`.
569 Nodes are created appropriate to type `newType`."""
572 while begin
< len(children
):
573 first
= children
[begin
]
575 while end
< len(children
):
577 if not(first
.__class
__ == last
.__class
__
578 or (isinstance(first
, nodes
.Text
)
579 and isinstance(last
, nodes
.Text
))):
582 result
.extend(self
.copyChildren(first
, children
[begin
+ 1:end
],
587 def mergeChildren(self
, diffRoot
, oldRoot
, newRoot
,
588 command
, oldRange
, newRange
):
589 """Add children to `diffRoot` merging children `oldRange` / `newRange`
590 of `oldRoot` / `newRoot` by `command`."""
591 if command
== Opcode
.Equal
:
593 self
.addChild(diffRoot
, self
.copyChild(old
, None))
594 elif command
== Opcode
.Insert
or command
== Opcode
.Delete
:
595 if command
== Opcode
.Insert
:
598 newType
= self
.NewInsert
602 newType
= self
.NewDelete
603 for newChild
in self
.copyRange(srcRoot
, srcRange
, newType
):
604 self
.addChild(diffRoot
, newChild
)
605 elif command
== Opcode
.Replace
:
606 # TODO Replacement doubles elements. This needs to be
607 # reflected properly in the @ids. If the @ids don't change
608 # there need to be unique @ids for replaced elements. This
609 # needs also to be reflected in referring @refid and
611 for newChild
in self
.copyRange(oldRoot
, oldRange
,
613 self
.addChild(diffRoot
, newChild
)
614 for newChild
in self
.copyRange(newRoot
, newRange
,
615 self
.NewReplacement
):
616 self
.addChild(diffRoot
, newChild
)
618 raise TypeError("Unhandled command %r" % ( command
, ))
620 ###########################################################################
621 ###########################################################################
624 def setNewType(self
, node
, newType
):
625 """Set a class on `node` for `newType` if set. Returns `node`."""
627 node
['classes'].append("change-%s" % ( newType
, ))
630 ###########################################################################
631 ###########################################################################
632 # Real comparison and merging
634 # The idea is like this: Each node has attributes which need to be
635 # compared as root and it has attributes which need to be compared
636 # as child. This is different for every node type.
638 # Similarly each node type may need special methods for cloning
641 ###########################################################################
642 # Text / Word / White
644 def rootHash_Text(self
, node
):
645 return hash(node
.astext())
647 rootHash_Word
= rootHash_Text
649 def rootHash_White(self
, node
):
650 # Whitespace compares all equal
653 def rootEq_Text(self
, node
, other
):
654 return node
.astext() == other
.astext()
656 rootEq_Word
= rootEq_Text
658 def rootEq_White(self
, node
, other
):
659 # TODO Must behave different for places where whitespace
660 # differences are relevant
663 # Text behaves the same as root or child
665 childHash_Text
= rootHash_Text
666 childHash_Word
= rootHash_Word
667 childHash_White
= rootHash_White
669 childEq_Text
= rootEq_Text
670 childEq_Word
= rootEq_Word
671 childEq_White
= rootEq_White
673 def copyChildren_Text(self
, head
, tail
, root
, newType
):
674 if not tail
and isinstance(head
, nodes
.Text
) and not head
.astext():
675 # Do not create empty inlines
677 inline
= nodes
.inline()
678 self
.setNewType(inline
, newType
)
679 inline
.extend([ head
, ] + tail
)
682 # Sequences of Text are treated together
683 copyChildren_Word
= copyChildren_Text
684 copyChildren_White
= copyChildren_Text
686 ###########################################################################
689 def getSectionName(self
, node
):
690 """Return the best name for `node`."""
692 return node
['dupnames'][0]
694 return node
['names'][0]
696 return node
['ids'][0]
697 return '' # No idea...
699 def rootEq_section(self
, node
, other
):
700 """Compare sections by their names or normally."""
701 if node
.document
.settings
.compare_sections_by_names
:
702 return self
.getSectionName(node
) == self
.getSectionName(other
)
705 ###########################################################################
706 # For some elements their attributes need to be considered to
709 def attributeEq(self
, node
, other
, attribute
):
710 if (attribute
in node
) != (attribute
in other
):
712 if not attribute
in node
:
714 return node
[attribute
] == other
[attribute
]
716 ###########################################################################
719 def rootEq_reference(self
, node
, other
):
720 return self
.attributeEq(node
, other
, 'refuri')
722 ###########################################################################
725 def rootEq_target(self
, node
, other
):
726 return self
.attributeEq(node
, other
, 'refuri')
728 ###########################################################################
731 # TODO This is typically a minor change and should be requested by
734 def attributeEq_bullet_list(self
, node
, other
):
735 return self
.attributeEq(node
, other
, 'bullet')
737 def rootEq_bullet_list(self
, node
, other
):
738 return self
.attributeEq_bullet_list(node
, other
)
740 def childEq_bullet_list(self
, node
, other
):
741 return (self
.attributeEq_bullet_list(node
, other
)
742 and self
.childrenEq(node
, other
))
744 ###########################################################################
747 # TODO This is typically a minor change and should be requested by
750 def attributeEq_enumerated_list(self
, node
, other
):
751 return (self
.attributeEq(node
, other
, 'enumtype')
752 and self
.attributeEq(node
, other
, 'prefix')
753 and self
.attributeEq(node
, other
, 'suffix')
754 and self
.attributeEq(node
, other
, 'start'))
756 def rootEq_enumerated_list(self
, node
, other
):
757 return self
.attributeEq_enumerated_list(node
, other
)
759 def childEq_enumerated_list(self
, node
, other
):
760 return (self
.attributeEq_enumerated_list(node
, other
)
761 and self
.childrenEq(node
, other
))
763 ###########################################################################
766 def rootEq_image(self
, node
, other
):
767 if node
.__class
__ != other
.__class
__:
769 return self
.attributeEq(node
, other
, 'uri')
771 ###########################################################################
772 # Some elements may contain only #PCDATA. They need to propagate
773 # changes in their children up to the element itself.
775 def rootEqWithChildren(self
, node
, other
):
776 if node
.__class
__ != other
.__class
__:
778 return self
.childrenEq(node
, other
)
780 ###########################################################################
783 rootEq_comment
= rootEqWithChildren
785 ###########################################################################
788 rootEq_literal
= rootEqWithChildren
790 ###########################################################################
793 rootEq_option_string
= rootEqWithChildren
795 ###########################################################################
798 # TODO This is typically a minor change and should be requested by
801 rootEq_label
= rootEqWithChildren
803 ###########################################################################
806 # TODO This is typically a minor change and should be requested by
809 rootEq_footnote_reference
= rootEqWithChildren
811 ###########################################################################
814 # TODO This is typically a minor change and should be requested by
817 rootEq_citation_reference
= rootEqWithChildren
819 ###########################################################################
820 # For some elements their attributes need to be considered to
821 # detect changes *and* they may contain only #PCDATA.
823 ###########################################################################
826 # TODO This is typically a minor change and should be requested by
829 def attributeEq_option_argument(self
, node
, other
):
830 return self
.attributeEq(node
, other
, 'delimiter')
832 def rootEq_option_argument(self
, node
, other
):
833 return (self
.attributeEq_option_argument(node
, other
)
834 and self
.rootEqWithChildren(node
, other
))
836 def childEq_option_argument(self
, node
, other
):
837 return (self
.attributeEq_option_argument(node
, other
)
838 and self
.childrenEq(node
, other
))
840 ###########################################################################
841 # A change in certain elements must propagate the change up since
842 # they may occur only once. Must be done by parents.
844 # Checks whether `node` and `other` have both a node of type
845 # `childClass` and whether the first of thosee are equal.
846 def rootEqWithChild(self
, node
, other
, childClass
):
847 if node
.__class
__ != other
.__class
__:
851 for nodeChild
in self
.getChildren(node
):
852 if isinstance(nodeChild
, childClass
):
853 nodeFound
= nodeChild
857 for otherChild
in self
.getChildren(other
):
858 if isinstance(otherChild
, childClass
):
859 otherFound
= otherChild
862 if nodeFound
is None or otherFound
is None:
865 return self
.childEq(nodeFound
, otherFound
)
867 ###########################################################################
870 def rootEq_footnote(self
, node
, other
):
871 return self
.rootEqWithChild(node
, other
, nodes
.label
)
873 ###########################################################################
876 def rootEq_citation(self
, node
, other
):
877 return self
.rootEqWithChild(node
, other
, nodes
.label
)
879 ###########################################################################
882 def rootEq_option(self
, node
, other
):
883 return self
.rootEqWithChild(node
, other
, nodes
.option_string
)
885 ###########################################################################
886 # Some attributes of some elements depend on their concrete parents.
889 def copyRoot_tgroup(self
, node
):
891 copy
['origcols'] = copy
['cols']
895 def addChild_tgroup(self
, root
, child
):
897 # This works only if for each column there is a `colspec`. Is
899 if isinstance(child
, nodes
.colspec
):
901 elif isinstance(child
, nodes
.tbody
):
902 # All columns seen - check the column widths
903 if root
['origcols'] != root
['cols']:
905 if isinstance(elem
, nodes
.colspec
):
906 elem
['colwidth'] = 100 / root
['cols']
909 # TODO Number of entries must change according to the (changed)
910 # number of columns; for added or removed columns entries of *one*
911 # column must be added / removed
913 ###############################################################################
914 ###############################################################################
917 def processCommandLine():
918 """Process command line and return a `Publisher`."""
919 # Determine writer here so options can be given normally
920 preWriter
= writerDefault
922 match
= re
.search(writerArgRE1
, arg
)
924 preWriter
= match
.group(1)
926 pub
= Publisher3Args()
927 pub
.set_reader('standalone', None, 'restructuredtext')
928 pub
.set_writer(preWriter
)
930 settingsSpec
= SettingsSpec()
931 settingsSpec
.settings_spec
= settings_spec
932 settingsSpec
.settings_defaults
= settings_defaults
933 pub
.process_command_line(usage
=usage
, description
=description
,
934 settings_spec
=settingsSpec
,
935 config_section
=config_section
)
936 if pub
.settings
.writer
!= preWriter
:
937 new_reporter('<cmdline>',
938 pub
.settings
).severe("Internal error: Mismatch of pre-parsed (%r) and real (%r) writer"
939 % ( preWriter
, pub
.settings
.writer
, ))
940 pub
.set_destination()
943 def readTree(pub
, sourceName
):
944 """Read and return a tree from `sourceName`."""
945 # Reset reader - just in case it keeps state from a previous invocation
946 pub
.set_reader('standalone', None, 'restructuredtext')
947 pub
.set_source(None, sourceName
)
949 pub
.document
= pub
.reader
.read(pub
.source
, pub
.parser
, pub
.settings
)
950 pub
.apply_transforms()
953 def doDiff(hashableNodeImpl
, oldTree
, newTree
):
954 """Create a difference from `oldTree` to `newTree` using
955 `hashableNodeImpl`. Returns the opcodes necessary to transform
956 `oldTree` to `newTree`."""
957 matcher
= TreeMatcher(hashableNodeImpl
, oldTree
, newTree
,
958 lambda node
: isinstance(node
, White
))
959 return matcher
.get_opcodes()
961 def buildDocument(oldTree
, newTree
, settings
):
962 """Returns a new document for the result of converting `oldTree` to
964 if (not isinstance(oldTree
, docutils
.nodes
.document
)
965 or not isinstance(newTree
, docutils
.nodes
.document
)):
966 raise TypeError("Roots of trees must be documents")
967 return new_document(u
"%s => %s"
968 % ( settings
._old
_source
, settings
._new
_source
, ),
971 def buildTree(dispatcher
, diffRoot
, opcodes
, oldRoot
, newRoot
):
972 """Adds a new sub-tree under `diffRoot` converting children of
973 `oldRoot` to `newRoot` using `opcodes`."""
974 oldChildren
= dispatcher
.getChildren(oldRoot
)
975 newChildren
= dispatcher
.getChildren(newRoot
)
976 for opcode
in opcodes
:
977 ( command
, oldRange
, newRange
,
978 subOpcodes
, ) = Opcode(opcode
).resolveOpcode(oldChildren
, newChildren
)
979 if command
== Opcode
.Descend
:
980 child
= dispatcher
.copyRoot(oldRange
[0])
981 dispatcher
.addChild(diffRoot
, child
)
982 buildTree(dispatcher
, child
,
983 subOpcodes
, oldRange
[0], newRange
[0])
985 dispatcher
.mergeChildren(diffRoot
, oldRoot
, newRoot
,
986 command
, oldRange
, newRange
)
988 # A replacement in certain elements must not be propagated up since
989 # they may occur only once and replacement would double them
990 replaceNotUp
= ( nodes
.title
, nodes
.subtitle
, nodes
.term
, nodes
.field_name
,
991 nodes
.attribution
, nodes
.caption
, # (%text.model)
992 nodes
.header
, nodes
.footer
, nodes
.definition
,
993 nodes
.field_body
, nodes
.description
, nodes
.legend
,
994 nodes
.entry
, # (%body.elements;+) or (%body.elements;*)
995 nodes
.decoration
, nodes
.docinfo
, nodes
.transition
,
996 nodes
.option_group
, nodes
.thead
,
997 nodes
.tbody
, # different content model
1000 # A replacement in certain elements normally not subject to up
1001 # propagation and contained in certain elements may propagate up if
1002 # all their siblings are also replacements and would propagate up
1003 replaceUpSiblings
= (
1004 ( nodes
.title
, nodes
.section
, ),
1005 ( nodes
.subtitle
, nodes
.section
, ),
1006 ( nodes
.term
, nodes
.definition_list_item
, ),
1007 ( nodes
.field_name
, nodes
.field
, ),
1008 ( nodes
.attribution
, nodes
.block_quote
, ),
1009 ( nodes
.caption
, nodes
.figure
, ),
1010 ( nodes
.definition
, nodes
.definition_list_item
, ),
1011 ( nodes
.field_body
, nodes
.field
, ),
1012 ( nodes
.description
, nodes
.option_list_item
, ),
1013 ( nodes
.legend
, nodes
.figure
, ),
1014 ( nodes
.option_group
, nodes
.option_list_item
, ),
1017 # TODO If much text is replaced in a text element the whole element
1018 # should be replaced. This makes more sense to people than two large
1019 # replaced/replacement blocks where the only equality is in words like
1020 # "the". The exact meaning of "much" should be an option.
1021 def cleanOpcodes(opcodes
, dispatcher
, oldList
, newList
):
1022 """Replace some nasty results in `opcodes` by cleaner versions. Opcodes
1023 create `newList` from `oldList`."""
1024 mightReplaceUpSiblings
= [ ]
1025 for i
in range(len(opcodes
)):
1026 opcode
= Opcode(opcodes
[i
])
1027 ( command
, oldRange
, newRange
, subOpcodes
,
1028 ) = opcode
.resolveOpcode(oldList
, newList
)
1030 # Nothing to clean for flat or empty opcodes
1033 oldNode
= oldRange
[0]
1034 newNode
= newRange
[0]
1035 cleanOpcodes(subOpcodes
, dispatcher
, dispatcher
.getChildren(oldNode
),
1036 dispatcher
.getChildren(newNode
))
1038 while j
< len(subOpcodes
):
1039 prev
= Opcode(subOpcodes
[j
- 1])
1040 this
= Opcode(subOpcodes
[j
])
1041 if (this
.getCommand() != Opcode
.Descend
1042 and prev
.getCommand() == this
.getCommand()):
1043 # Merge adjacing opcodes of same type
1044 prevOld
= prev
.getOldRange()
1045 prevNew
= prev
.getNewRange()
1046 thisOld
= this
.getOldRange()
1047 thisNew
= this
.getNewRange()
1048 prev
.setOldRange(( prevOld
[0], thisOld
[1], ))
1049 prev
.setNewRange(( prevNew
[0], thisNew
[1], ))
1050 subOpcodes
[j
- 1:j
+ 1] = [ prev
.asTuple(), ]
1053 opcode
.setSubOpcodes(subOpcodes
)
1054 if len(subOpcodes
) == 1:
1055 subOpcode
= Opcode(subOpcodes
[0])
1056 if subOpcode
.getCommand() == Opcode
.Descend
:
1058 elif subOpcode
.getCommand() == Opcode
.Replace
:
1059 if any([ isinstance(oldNode
, cls
)
1060 for cls
in replaceNotUp
]):
1062 if any([ isinstance(oldNode
, cls
)
1063 and isinstance(oldNode
.parent
, parentCls
)
1064 for ( cls
, parentCls
, ) in replaceUpSiblings
]):
1065 # If for instance a section/title would
1066 # propagate a replacement up the propagation
1067 # needs to be done if all siblings would
1068 # also propagate a replacement up
1069 mightReplaceUpSiblings
.append(i
)
1075 # Propagate 1-element sequences up
1076 opcode
.setCommand(subOpcode
.getCommand())
1077 opcodes
[i
] = opcode
.asTuple()
1079 if mightReplaceUpSiblings
:
1080 # There are entries which might propagate a replace up if all
1081 # siblings could do as well
1082 if all([ i
in mightReplaceUpSiblings
1083 or Opcode(opcodes
[i
]).getCommand() == Opcode
.Replace
1084 for i
in range(len(opcodes
)) ]):
1085 # All entries are replacements which may propagate up -
1086 # actually propagate elements which may propagate
1087 for i
in mightReplaceUpSiblings
:
1088 opcode
= Opcode(opcodes
[i
])
1089 opcode
.setCommand(Opcode
.Replace
)
1090 opcodes
[i
] = opcode
.asTuple()
1092 def createDiff(pub
, oldTree
, newTree
):
1093 """Create and return a diff document from `oldTree` to `newTree`."""
1094 realDebug
= pub
.settings
.debug
1095 pub
.settings
.debug
= pub
.settings
.dump_rstdiff
1096 reporter
= new_reporter("RSTDIFF", pub
.settings
)
1097 pub
.settings
.debug
= realDebug
1098 dispatcher
= DocutilsDispatcher(reporter
)
1099 opcodes
= doDiff(dispatcher
, oldTree
, newTree
)
1101 if pub
.settings
.dump_rstdiff
:
1102 reporter
.debug(oldTree
.asdom().toprettyxml())
1103 reporter
.debug(newTree
.asdom().toprettyxml())
1104 reporter
.debug(pformat(opcodes
, 2, 40, None))
1105 reporter
.debug("^^^ Before cleaning vvv After cleaning")
1107 cleanOpcodes(opcodes
, dispatcher
, [ oldTree
], [ newTree
])
1109 if pub
.settings
.dump_rstdiff
:
1110 reporter
.debug(pformat(opcodes
, 2, 40, None))
1112 if len(opcodes
) != 1:
1113 raise TypeError("Don't know how to merge documents which are not rootEq")
1114 opcode
= Opcode(opcodes
[0])
1115 if opcode
.getCommand() not in ( Opcode
.Descend
, Opcode
.Equal
, ):
1116 # TODO There should be a sense making message for this case
1117 # because this may happen due to up propagation of replacements
1118 raise TypeError("Don't know how to merge top level opcode of type %r"
1119 % ( opcode
.getCommand(), ))
1121 diffDoc
= buildDocument(oldTree
, newTree
, pub
.settings
)
1122 if opcode
.getCommand() == Opcode
.Equal
:
1123 # TODO Equality should be reported somehow
1124 diffDoc
.extend([ child
.deepcopy()
1125 for child
in newTree
.children
])
1127 buildTree(dispatcher
, diffDoc
, opcode
.getSubOpcodes(), oldTree
, newTree
)
1130 if __name__
== '__main__':
1131 pub
= processCommandLine()
1133 useOptions(pub
.settings
, oldOption
)
1134 oldTree
= readTree(pub
, pub
.settings
._old
_source
)
1135 useOptions(pub
.settings
, newOption
)
1136 newTree
= readTree(pub
, pub
.settings
._new
_source
)
1137 useOptions(pub
.settings
, bothOption
)
1139 Text2Words(oldTree
).apply()
1140 Text2Words(newTree
).apply()
1142 diffDoc
= createDiff(pub
, oldTree
, newTree
)
1143 Words2Text(diffDoc
).apply()
1144 Generated2Inline(diffDoc
).apply()
1146 pub
.writer
.write(diffDoc
, pub
.destination
)
1147 pub
.writer
.assemble_parts()
1149 # TODO The CSS classes need to be set in a CSS stylesheet