3 # Copyright (C) 2010 Stefan Merten
5 # rstdiff.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 Generates a structural diff from two reStructuredText input documents
22 and produces an annotated result.
25 __docformat__
= 'reStructuredText'
29 locale
.setlocale(locale
.LC_ALL
, '')
36 from docutils
import frontend
, writers
, nodes
, SettingsSpec
37 from docutils
.core
import Publisher
38 from docutils
.utils
import SystemMessage
, Reporter
, new_reporter
, new_document
39 from docutils
.frontend
import OptionParser
, make_paths_absolute
40 from docutils
.transforms
import Transform
42 from treediff
import TreeMatcher
, HashableNodeImpl
44 ###############################################################################
45 ###############################################################################
46 # Command line specification
48 description
= ("""Generates a structural diff from two reStructuredText input
49 documents and produces an annotated result. """)
51 writerOption
= 'writer'
53 writerArgRE1
= '^--' + writerOption
+ '=' + '(.*)$'
58 (('Select writer to write output with (default "xml").',
59 ['--' + writerOption
],
64 settings_defaults
= {'output_encoding_error_handler': 'xmlcharrefreplace',
65 writerOption
: writerDefault
}
67 config_section
= 'rstdiff'
69 usage
= '%prog [options]... <old> [<new> [<output>]]'
71 ###############################################################################
72 # Classes for three argument command lines
74 class Publisher3Args(Publisher
):
76 def setup_option_parser(self
, usage
=None, description
=None,
77 settings_spec
=None, config_section
=None,
81 settings_spec
= SettingsSpec()
82 settings_spec
.config_section
= config_section
83 parts
= config_section
.split()
84 if len(parts
) > 1 and parts
[-1] == 'application':
85 settings_spec
.config_section_dependencies
= ['applications']
86 #@@@ Add self.source & self.destination to components in future?
87 option_parser
= OptionParser3Args(
88 components
=(self
.parser
, self
.reader
, self
.writer
, settings_spec
),
89 defaults
=defaults
, read_config_files
=1,
90 usage
=usage
, description
=description
)
93 class OptionParser3Args(OptionParser
):
95 def check_values(self
, values
, args
):
96 """Store positional arguments as runtime settings."""
97 values
._old
_source
, values
._new
_source
, values
._destination
= self
.check_args(args
)
98 make_paths_absolute(values
.__dict
__, self
.relative_path_settings
,
100 values
._config
_files
= self
.config_files
103 def check_args(self
, args
):
104 old_source
= new_source
= destination
= None
106 self
.error('At least 1 argument required.')
108 old_source
= args
.pop(0)
109 if old_source
== '-': # means stdin
112 new_source
= args
.pop(0)
113 if new_source
== '-': # means stdin
116 destination
= args
.pop(0)
117 if destination
== '-': # means stdout
120 self
.error('Maximum 3 arguments allowed.')
121 if old_source
is None and new_source
is None:
122 self
.error('Old and new source may not both use stdin.')
123 if (old_source
and old_source
== destination
124 or new_source
and new_source
== destination
):
125 self
.error('Do not specify the same file for both source and '
126 'destination. It will clobber the source file.')
127 return old_source
, new_source
, destination
129 ###############################################################################
130 ###############################################################################
133 class Opcode(object):
134 """Encapsulates opcodes as returned by `TreeMatcher.get_opcodes()`"""
144 def __init__(self
, opcodeTuple
):
145 """Initialize from a tuple returned by `TreeMatcher.get_opcodes()`"""
146 self
._tuple
= list(opcodeTuple
)
148 def getCommand(self
):
149 """Return the command."""
150 return self
._tuple
[0]
152 def getOldRange(self
):
153 """Returns the range pertaining to an old list."""
154 return ( self
._tuple
[1], self
._tuple
[2], )
156 def getNewRange(self
):
157 """Returns the range pertaining to a new list."""
158 return ( self
._tuple
[3], self
._tuple
[4], )
160 def getSubOpcodes(self
):
161 """Return the sub-opcodes in case of `command` == 'descend' or
163 if self
._tuple
[0] != self
.Descend
:
165 return self
._tuple
[5]
167 def resolveOpcode(self
, oldList
, newList
):
168 """Resolves opcode pertaining to `oldList` and `newList`. Returns tuple
172 Same as self.getCommand().
175 The range of elements in `oldList` affected by the opcode.
178 The range of elements in `newList` affected by the opcode.
181 Same as self.getSubOpcodes().
183 oldRange
= self
.getOldRange()
184 newRange
= self
.getNewRange()
185 return ( self
.getCommand(), oldList
[oldRange
[0]:oldRange
[1]],
186 newList
[newRange
[0]:newRange
[1]], self
.getSubOpcodes())
188 def setSubOpcodes(self
, opcodes
):
189 """Set the sub-opcodes to a new list."""
190 if self
._tuple
[0] != self
.Descend
:
191 raise TypeError("Can not set subopcodes of a %r opcode"
192 % ( self
._tuple
[0], ))
193 self
._tuple
[5] = opcodes
195 def setCommand(self
, command
):
196 """Set a new command adapting subopcodes."""
197 if self
._tuple
[0] == command
:
199 self
._tuple
[0] = command
200 if command
== self
.Descend
:
203 self
._tuple
= self
._tuple
[0:5]
205 def setOldRange(self
, range):
206 """Sets the range pertaining to an old list."""
207 ( self
._tuple
[1], self
._tuple
[2], ) = range
209 def setNewRange(self
, range):
210 """Sets the range pertaining to a new list."""
211 ( self
._tuple
[3], self
._tuple
[4], ) = range
214 """Return the opcode as a tuple."""
215 return tuple(self
._tuple
)
217 ###############################################################################
218 ###############################################################################
219 # Additional docutils stuff
221 ###############################################################################
224 class White(nodes
.Text
):
225 """A piece of text containing only whitespace."""
229 """A regular expression matching strings for this class and returning
230 them as the first match."""
231 # TODO Could be subject to an option
234 class Word(nodes
.Text
):
235 """A piece of text containing exactly one word."""
241 """Splits text and returns a sequence of `Word` and `White`
242 objects. Returns an empty sequence for an empty `text`."""
244 subs
= re
.split(White
.re
, text
.astext())
248 elif re
.match(White
.re
, subs
[0]):
249 ( current
, next
, ) = ( White
, Word
, )
251 ( current
, next
, ) = ( Word
, White
, )
253 result
.append(current(sub
))
254 ( current
, next
, ) = ( next
, current
, )
257 ###############################################################################
260 class Text2Words(Transform
):
261 """Transforms a `Text` node into a sequence of `Word`/`White`."""
264 self
.document
.walk(Text2WordsVisitor(self
.document
))
266 class Text2WordsVisitor(nodes
.SparseNodeVisitor
):
268 def visit_Text(self
, text
):
269 words
= Word
.splitText(text
)
272 words
= [ White(''), ]
273 text
.parent
.replace(text
, words
)
275 class Words2Text(Transform
):
276 """Transforms a sequence of `Word`/`White` into a `Text` node."""
279 self
.document
.walk(Words2TextVisitor(self
.document
))
281 class Words2TextVisitor(nodes
.SparseNodeVisitor
):
283 def visit_Text(self
, text
):
285 # Find this node and the first node of the sequence it belongs to
287 for i
in range(len(parent
)):
288 if not isinstance(parent
[i
], nodes
.Text
):
292 # ``parent.index(text)`` uses value equality - can not be
293 # used here to find `text`
294 if id(parent
[i
]) == id(text
):
298 raise IndexError("Can not find %r in its parent" % ( text
, ))
300 if (len(parent
) > end
301 and isinstance(parent
[end
], nodes
.Text
)):
302 # The visitor processes following children even if they are
303 # deleted - so work for last node of a sequence
306 texts
= nodes
.Text(reduce(lambda s
, node
: s
+ node
.astext(),
307 parent
[first
:end
], ""))
308 parent
[first
:end
] = ( texts
, )
310 visit_White
= visit_Text
312 visit_Word
= visit_Text
314 ###############################################################################
315 ###############################################################################
318 class DocutilsDispatcher(HashableNodeImpl
):
319 """Implements hashable for a docutils `Node` and supports construction."""
324 super(self
.__class
__, self
).__init
__(nodes
.Node
)
326 def dispatchClass(self
, function
, node
, *args
):
327 """Dispatch a call of type `function` for the class of `node` using
328 arguments `node` and `args`. Default is to dispatch for imaginary class
330 pat
= "%s_%%s" % ( function
, )
332 name
= pat
% ( node
.__class
__.__name
__, )
333 method
= getattr(self
, name
)
334 except AttributeError:
335 name
= pat
% ( 'UNKNOWN', )
336 method
= getattr(self
, name
)
339 % ( name
, ", ".join([ arg
.__class
__.__name
__
340 for arg
in ( node
, ) + args
]), ))
341 for arg
in ( node
, ) + args
:
342 print(" > %s" % ( arg
, ))
343 result
= method(node
, *args
)
345 print(" < %s" % ( result
, ))
348 ###########################################################################
349 ###########################################################################
350 # Implementation of abstract methods for `HashableNodeImpl`
352 def rootHash(self
, node
):
353 """Return a hash for the root only. Subclasses must override
355 return self
.dispatchClass('rootHash', node
)
357 def rootHash_UNKNOWN(self
, node
):
358 return hash(node
.__class
__)
360 def rootEq(self
, node
, other
):
361 """Returns root equality of `node` and an `other` node. ``True`` if
362 the two nodes as roots are equal without considering their
363 children. This should be true if one node can be replaced by
364 the other and all changes can be represented without changing
365 the node itself. Subclasses must override this."""
366 # Only nodes of the same class can be equal - this assumption
367 # is used in many places
368 if node
.__class
__ != other
.__class
__:
370 return self
.dispatchClass('rootEq', node
, other
)
372 def rootEq_UNKNOWN(self
, node
, other
):
373 # Unless we know better two roots of the same type are considered equal
376 def childHash(self
, node
):
377 """Return a hash for the node as a child. Subclasses must override
379 return self
.dispatchClass('childHash', node
)
381 def childHash_UNKNOWN(self
, node
):
382 # By default compare as a child by comparing children
383 return self
.childrenHash(node
)
385 def childEq(self
, node
, other
):
386 """Returns equality of `node` and an `other` node as children.
387 ``True`` if the child features of the two nodes are equal
388 without considering the root. Subclasses must override
390 # Only nodes of the same class can be equal - this assumption
391 # is used in many places
392 if node
.__class
__ != other
.__class
__:
394 return self
.dispatchClass('childEq', node
, other
)
396 def childEq_UNKNOWN(self
, node
, other
):
397 # By default compare as a child by comparing children
398 return self
.childrenEq(node
, other
)
400 def getChildren(self
, node
):
401 """Return the children of `node` as a list. Subclasses must override
403 return self
.dispatchClass('getChildren', node
)
405 def getChildren_UNKNOWN(self
, node
):
408 ###########################################################################
409 ###########################################################################
412 # TODO The resulting class names should be configurable
413 NewDelete
= 'removed'
415 NewReplaced
= 'replaced'
416 NewReplacement
= 'replacement'
418 def copyRoot(self
, node
):
419 """Copy `node` as root and return it."""
420 return self
.dispatchClass('copyRoot', node
)
422 def copyRoot_UNKNOWN(self
, node
):
425 def addChild(self
, root
, child
):
426 """Add `child` to `root`."""
427 return self
.dispatchClass('addChild', root
, child
)
429 def addChild_UNKNOWN(self
, root
, child
):
432 def copyChild(self
, node
, newType
):
433 """Copy `node` as child and return it. `newType` is ``None`` for an
434 unchanged child or the change type."""
435 return self
.dispatchClass('copyChild', node
, newType
)
437 def copyChild_UNKNOWN(self
, node
, newType
):
438 return self
.setNewType(node
.deepcopy(), newType
)
440 def copyChildren(self
, head
, tail
, root
, newType
):
441 """Return a range of new nodes copied from [ `head` ] + `tail` under
442 `root`. `tail` are all the same class as `head`. Nodes are
443 created approproate to type `newType`."""
444 return self
.dispatchClass('copyChildren', head
, tail
, root
, newType
)
446 def copyChildren_UNKNOWN(self
, head
, tail
, root
, newType
):
447 return [ self
.copyChild(child
, newType
)
448 for child
in [ head
, ] + tail
]
450 def copyRange(self
, root
, children
, newType
):
451 """Return a range of new nodes copied from `children` under `root`.
452 Nodes are created appropriate to type `newType`."""
455 while begin
< len(children
):
456 first
= children
[begin
]
458 while end
< len(children
):
460 if not(first
.__class
__ == last
.__class
__
461 or (isinstance(first
, nodes
.Text
)
462 and isinstance(last
, nodes
.Text
))):
465 result
.extend(self
.copyChildren(first
, children
[begin
+ 1:end
],
470 def mergeChildren(self
, diffRoot
, oldRoot
, newRoot
,
471 command
, oldRange
, newRange
):
472 """Add children to `diffRoot` merging children `oldRange` / `newRange`
473 of `oldRoot` / `newRoot` by `command`."""
474 if command
== Opcode
.Equal
:
476 self
.addChild(diffRoot
, self
.copyChild(old
, None))
477 elif command
== Opcode
.Insert
or command
== Opcode
.Delete
:
478 if command
== Opcode
.Insert
:
481 newType
= self
.NewInsert
485 newType
= self
.NewDelete
486 for newChild
in self
.copyRange(srcRoot
, srcRange
, newType
):
487 self
.addChild(diffRoot
, newChild
)
488 elif command
== Opcode
.Replace
:
489 # TODO Replacement doubles elements. This needs to be
490 # reflected properly in the @ids. If the @ids don't change
491 # there need to be unique @ids for replaced elements. This
492 # needs also to be reflected in referring @refid and
494 for newChild
in self
.copyRange(oldRoot
, oldRange
,
496 self
.addChild(diffRoot
, newChild
)
497 for newChild
in self
.copyRange(newRoot
, newRange
,
498 self
.NewReplacement
):
499 self
.addChild(diffRoot
, newChild
)
501 raise TypeError("Unhandled command %r" % ( command
, ))
503 ###########################################################################
504 ###########################################################################
507 def setNewType(self
, node
, newType
):
508 """Set a class on `node` for `newType` if set. Returns `node`."""
510 node
['classes'].append("change-%s" % ( newType
, ))
513 ###########################################################################
514 ###########################################################################
515 # Real comparison and merging
517 # The idea is like this: Each node has attributes which need to be
518 # compared as root and it has attributes which need to be compared
519 # as child. This is different for every node type.
521 # Similarly each node type may need special methods for cloning
524 ###########################################################################
525 # Text / Word / White
527 def rootHash_Text(self
, node
):
528 return hash(node
.astext())
530 rootHash_Word
= rootHash_Text
532 def rootHash_White(self
, node
):
533 # Whitespace compares all equal
536 def rootEq_Text(self
, node
, other
):
537 return node
.astext() == other
.astext()
539 rootEq_Word
= rootEq_Text
541 def rootEq_White(self
, node
, other
):
542 # TODO Must behave different for places where whitespace
543 # differences are relevant
546 # Text behaves the same as root or child
548 childHash_Text
= rootHash_Text
549 childHash_Word
= rootHash_Word
550 childHash_White
= rootHash_White
552 childEq_Text
= rootEq_Text
553 childEq_Word
= rootEq_Word
554 childEq_White
= rootEq_White
556 def copyChildren_Text(self
, head
, tail
, root
, newType
):
557 if not tail
and isinstance(head
, nodes
.Text
) and not head
.astext():
558 # Do not create empty inlines
560 inline
= nodes
.inline()
561 self
.setNewType(inline
, newType
)
562 inline
.extend([ head
, ] + tail
)
565 # Sequences of Text are treated together
566 copyChildren_Word
= copyChildren_Text
567 copyChildren_White
= copyChildren_Text
569 ###########################################################################
570 # For some elements their attributes need to be considered to
573 def attributeEq(self
, node
, other
, attribute
):
574 if (attribute
in node
) != (attribute
in other
):
576 if not attribute
in node
:
578 return node
[attribute
] == other
[attribute
]
580 ###########################################################################
583 def rootEq_reference(self
, node
, other
):
584 return self
.attributeEq(node
, other
, 'refuri')
586 ###########################################################################
589 def rootEq_target(self
, node
, other
):
590 return self
.attributeEq(node
, other
, 'refuri')
592 ###########################################################################
595 # TODO This is typically a minor change and should be requested by
598 def attributeEq_bullet_list(self
, node
, other
):
599 return self
.attributeEq(node
, other
, 'bullet')
601 def rootEq_bullet_list(self
, node
, other
):
602 return self
.attributeEq_bullet_list(node
, other
)
604 def childEq_bullet_list(self
, node
, other
):
605 return (self
.attributeEq_bullet_list(node
, other
)
606 and self
.childrenEq(node
, other
))
608 ###########################################################################
611 # TODO This is typically a minor change and should be requested by
614 def attributeEq_enumerated_list(self
, node
, other
):
615 return (self
.attributeEq(node
, other
, 'enumtype')
616 and self
.attributeEq(node
, other
, 'prefix')
617 and self
.attributeEq(node
, other
, 'suffix')
618 and self
.attributeEq(node
, other
, 'start'))
620 def rootEq_enumerated_list(self
, node
, other
):
621 return self
.attributeEq_enumerated_list(node
, other
)
623 def childEq_enumerated_list(self
, node
, other
):
624 return (self
.attributeEq_enumerated_list(node
, other
)
625 and self
.childrenEq(node
, other
))
627 ###########################################################################
630 def rootEq_image(self
, node
, other
):
631 if node
.__class
__ != other
.__class
__:
633 return self
.attributeEq(node
, other
, 'uri')
635 ###########################################################################
636 # Some elements may contain only #PCDATA. They need to propagate
637 # changes in their children up to the element itself.
639 def rootEqWithChildren(self
, node
, other
):
640 if node
.__class
__ != other
.__class
__:
642 return self
.childrenEq(node
, other
)
644 ###########################################################################
647 rootEq_comment
= rootEqWithChildren
649 ###########################################################################
652 rootEq_literal
= rootEqWithChildren
654 ###########################################################################
657 rootEq_option_string
= rootEqWithChildren
659 ###########################################################################
662 # TODO This is typically a minor change and should be requested by
665 rootEq_label
= rootEqWithChildren
667 ###########################################################################
670 # TODO This is typically a minor change and should be requested by
673 rootEq_footnote_reference
= rootEqWithChildren
675 ###########################################################################
678 # TODO This is typically a minor change and should be requested by
681 rootEq_citation_reference
= rootEqWithChildren
683 ###########################################################################
684 # For some elements their attributes need to be considered to
685 # detect changes *and* they may contain only #PCDATA.
687 ###########################################################################
690 # TODO This is typically a minor change and should be requested by
693 def attributeEq_option_argument(self
, node
, other
):
694 return self
.attributeEq(node
, other
, 'delimiter')
696 def rootEq_option_argument(self
, node
, other
):
697 return (self
.attributeEq_option_argument(node
, other
)
698 and self
.rootEqWithChildren(node
, other
))
700 def childEq_option_argument(self
, node
, other
):
701 return (self
.attributeEq_option_argument(node
, other
)
702 and self
.childrenEq(node
, other
))
704 ###########################################################################
705 # A change in certain elements must propagate the change up since
706 # they may occur only once. Must be done by parents.
708 # Checks whether `node` and `other` have both a node of type
709 # `childClass` and whether the first of thosee are equal.
710 def rootEqWithChild(self
, node
, other
, childClass
):
711 if node
.__class
__ != other
.__class
__:
715 for nodeChild
in self
.getChildren(node
):
716 if isinstance(nodeChild
, childClass
):
717 nodeFound
= nodeChild
721 for otherChild
in self
.getChildren(other
):
722 if isinstance(otherChild
, childClass
):
723 otherFound
= otherChild
726 if nodeFound
is None or otherFound
is None:
729 return self
.childEq(nodeFound
, otherFound
)
731 ###########################################################################
734 def rootEq_footnote(self
, node
, other
):
735 return self
.rootEqWithChild(node
, other
, nodes
.label
)
737 ###########################################################################
740 def rootEq_citation(self
, node
, other
):
741 return self
.rootEqWithChild(node
, other
, nodes
.label
)
743 ###########################################################################
746 def rootEq_option(self
, node
, other
):
747 return self
.rootEqWithChild(node
, other
, nodes
.option_string
)
749 ###############################################################################
750 ###############################################################################
753 def processCommandLine():
754 """Process command line and return a `Publisher`."""
755 # Determine writer here so options can be given normally
756 preWriter
= writerDefault
758 match
= re
.search(writerArgRE1
, arg
)
760 preWriter
= match
.group(1)
762 pub
= Publisher3Args()
763 pub
.set_reader('standalone', None, 'restructuredtext')
764 pub
.set_writer(preWriter
)
766 settingsSpec
= SettingsSpec()
767 settingsSpec
.settings_spec
= settings_spec
768 settingsSpec
.settings_defaults
= settings_defaults
769 pub
.process_command_line(usage
=usage
, description
=description
,
770 settings_spec
=settingsSpec
,
771 config_section
=config_section
)
772 if pub
.settings
.writer
!= preWriter
:
773 new_reporter('<cmdline>',
774 pub
.settings
).severe("Internal error: Mismatch of pre-parsed (%r) and real (%r) writer"
775 % ( preWriter
, pub
.settings
.writer
, ))
776 pub
.set_destination()
779 def readTree(pub
, sourceName
):
780 """Read and return a tree from `sourceName`."""
781 # Reset reader - just in case it keeps state from a previous invocation
782 pub
.set_reader('standalone', None, 'restructuredtext')
783 pub
.set_source(None, sourceName
)
785 pub
.document
= pub
.reader
.read(pub
.source
, pub
.parser
, pub
.settings
)
786 pub
.apply_transforms()
789 def doDiff(hashableNodeImpl
, oldTree
, newTree
):
790 """Create a difference from `oldTree` to `newTree` using
791 `hashableNodeImpl`. Returns the opcodes necessary to transform
792 `oldTree` to `newTree`."""
793 matcher
= TreeMatcher(hashableNodeImpl
, oldTree
, newTree
,
794 lambda node
: isinstance(node
, White
))
795 return matcher
.get_opcodes()
797 def buildDocument(oldTree
, newTree
, opcodes
, settings
):
798 """Returns a new document for the result of converting `oldTree` to
799 `newTree` using `opcodes` as description of changes."""
800 if (not isinstance(oldTree
, docutils
.nodes
.document
)
801 or not isinstance(newTree
, docutils
.nodes
.document
)):
802 raise TypeError("Roots of trees must be documents")
803 return new_document(u
"%s => %s"
804 % ( settings
._old
_source
, settings
._new
_source
, ),
807 def buildTree(dispatcher
, diffRoot
, opcodes
, oldRoot
, newRoot
):
808 """Adds a new sub-tree under `diffRoot` converting children of
809 `oldRoot` to `newRoot` using `opcodes`."""
810 oldChildren
= dispatcher
.getChildren(oldRoot
)
811 newChildren
= dispatcher
.getChildren(newRoot
)
812 for opcode
in opcodes
:
813 ( command
, oldRange
, newRange
,
814 subOpcodes
, ) = Opcode(opcode
).resolveOpcode(oldChildren
, newChildren
)
815 if command
== Opcode
.Descend
:
816 child
= dispatcher
.copyRoot(oldRange
[0])
817 dispatcher
.addChild(diffRoot
, child
)
818 buildTree(dispatcher
, child
,
819 subOpcodes
, oldRange
[0], newRange
[0])
821 dispatcher
.mergeChildren(diffRoot
, oldRoot
, newRoot
,
822 command
, oldRange
, newRange
)
824 # A replacement in certain elements must not be propagated up since
825 # they may occur only once and replacement would double them
826 replaceNotUp
= ( nodes
.title
, nodes
.subtitle
, nodes
.term
, nodes
.field_name
,
827 nodes
.attribution
, nodes
.caption
, # (%text.model)
828 nodes
.header
, nodes
.footer
, nodes
.definition
,
829 nodes
.field_body
, nodes
.description
, nodes
.legend
,
830 nodes
.entry
, # (%body.elements;+) or (%body.elements;*)
831 nodes
.decoration
, nodes
.docinfo
, nodes
.transition
,
832 nodes
.option_group
, nodes
.thead
,
833 nodes
.tbody
, # different content model
836 # A replacement in certain elements normally not subject to up
837 # propagation and contained in certain elements may propagate up if
838 # all their siblings are also replacements and would propagate up
839 replaceUpSiblings
= (
840 ( nodes
.title
, nodes
.section
, ),
841 ( nodes
.subtitle
, nodes
.section
, ),
842 ( nodes
.term
, nodes
.definition_list_item
, ),
843 ( nodes
.field_name
, nodes
.field
, ),
844 ( nodes
.attribution
, nodes
.block_quote
, ),
845 ( nodes
.caption
, nodes
.figure
, ),
846 ( nodes
.definition
, nodes
.definition_list_item
, ),
847 ( nodes
.field_body
, nodes
.field
, ),
848 ( nodes
.description
, nodes
.option_list_item
, ),
849 ( nodes
.legend
, nodes
.figure
, ),
850 ( nodes
.option_group
, nodes
.option_list_item
, ),
853 # TODO If much text is replaced in a text element the whole element
854 # should be replaced. This makes more sense to people than two large
855 # replaced/replacement blocks where the only equality is in words like
856 # "the". The exact meaning of "much" should be an option.
857 def cleanOpcodes(opcodes
, dispatcher
, oldList
, newList
):
858 """Replace some nasty results in `opcodes` by cleaner versions. Opcodes
859 create `newList` from `oldList`."""
860 mightReplaceUpSiblings
= [ ]
861 for i
in range(len(opcodes
)):
862 opcode
= Opcode(opcodes
[i
])
863 ( command
, oldRange
, newRange
, subOpcodes
,
864 ) = opcode
.resolveOpcode(oldList
, newList
)
866 # Nothing to clean for flat or empty opcodes
869 oldNode
= oldRange
[0]
870 newNode
= newRange
[0]
871 cleanOpcodes(subOpcodes
, dispatcher
, dispatcher
.getChildren(oldNode
),
872 dispatcher
.getChildren(newNode
))
874 while j
< len(subOpcodes
):
875 prev
= Opcode(subOpcodes
[j
- 1])
876 this
= Opcode(subOpcodes
[j
])
877 if (this
.getCommand() != Opcode
.Descend
878 and prev
.getCommand() == this
.getCommand()):
879 # Merge adjacing opcodes of same type
880 prevOld
= prev
.getOldRange()
881 prevNew
= prev
.getNewRange()
882 thisOld
= this
.getOldRange()
883 thisNew
= this
.getNewRange()
884 prev
.setOldRange(( prevOld
[0], thisOld
[1], ))
885 prev
.setNewRange(( prevNew
[0], thisNew
[1], ))
886 subOpcodes
[j
- 1:j
+ 1] = [ prev
.asTuple(), ]
889 opcode
.setSubOpcodes(subOpcodes
)
890 if len(subOpcodes
) == 1:
891 subOpcode
= Opcode(subOpcodes
[0])
892 if subOpcode
.getCommand() == Opcode
.Descend
:
894 elif subOpcode
.getCommand() == Opcode
.Replace
:
895 if any([ isinstance(oldNode
, cls
)
896 for cls
in replaceNotUp
]):
898 if any([ isinstance(oldNode
, cls
)
899 and isinstance(oldNode
.parent
, parentCls
)
900 for ( cls
, parentCls
, ) in replaceUpSiblings
]):
901 # If for instance a section/title would
902 # propagate a replacement up the propagation
903 # needs to be done if all siblings would
904 # also propagate a replacement up
905 mightReplaceUpSiblings
.append(i
)
911 # Propagate 1-element sequences up
912 opcode
.setCommand(subOpcode
.getCommand())
913 opcodes
[i
] = opcode
.asTuple()
915 if mightReplaceUpSiblings
:
916 # There are entries which might propagate a replace up if all
917 # siblings could do as well
918 if all([ i
in mightReplaceUpSiblings
919 or Opcode(opcodes
[i
]).getCommand() == Opcode
.Replace
920 for i
in range(len(opcodes
)) ]):
921 # All entries are replacements which may propagate up -
922 # actually propagate elements which may propagate
923 for i
in mightReplaceUpSiblings
:
924 opcode
= Opcode(opcodes
[i
])
925 opcode
.setCommand(Opcode
.Replace
)
926 opcodes
[i
] = opcode
.asTuple()
928 def createDiff(oldTree
, newTree
, debug
=False):
929 """Create and return a diff document from `oldTree` to `newTree`."""
930 dispatcher
= DocutilsDispatcher()
931 dispatcher
.debug
= debug
932 opcodes
= doDiff(dispatcher
, oldTree
, newTree
)
934 from pprint
import pprint
935 print(oldTree
.asdom().toprettyxml())
936 print(newTree
.asdom().toprettyxml())
937 pprint(opcodes
, sys
.stdout
, 2, 40, None)
938 print("^^^ Before cleaning vvv After cleaning")
939 cleanOpcodes(opcodes
, dispatcher
, [ oldTree
], [ newTree
])
941 from pprint
import pprint
942 pprint(opcodes
, sys
.stdout
, 2, 40, None)
943 if len(opcodes
) != 1:
944 raise TypeError("Don't know how to merge documents which are not rootEq")
945 opcode
= Opcode(opcodes
[0])
946 if opcode
.getCommand() not in ( Opcode
.Descend
, Opcode
.Equal
, ):
947 # TODO There should be a sense making message for this case
948 # because this may happen due to up propagation of replacements
949 raise TypeError("Don't know how to merge top level opcode of type %r"
950 % ( opcode
.getCommand(), ))
952 diffDoc
= buildDocument(oldTree
, newTree
, opcodes
, pub
.settings
)
953 if opcode
.getCommand() == Opcode
.Equal
:
954 # TODO Equality should be reported somehow
955 diffDoc
.extend([ child
.deepcopy()
956 for child
in newTree
.children
])
958 buildTree(dispatcher
, diffDoc
, opcode
.getSubOpcodes(), oldTree
, newTree
)
961 if __name__
== '__main__':
962 pub
= processCommandLine()
964 oldTree
= readTree(pub
, pub
.settings
._old
_source
)
965 newTree
= readTree(pub
, pub
.settings
._new
_source
)
967 Text2Words(oldTree
).apply()
968 Text2Words(newTree
).apply()
970 diffDoc
= createDiff(oldTree
, newTree
)
971 Words2Text(diffDoc
).apply()
973 pub
.writer
.write(diffDoc
, pub
.destination
)
974 pub
.writer
.assemble_parts()
976 # TODO The CSS classes need to be set in a CSS stylesheet