3 # Copyright (C) 2010 Stefan Merten
5 # rstdiff.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 Generates a structural diff from two reStructuredText input documents
22 and produces an annotated result.
25 __docformat__
= 'reStructuredText'
29 locale
.setlocale(locale
.LC_ALL
, '')
35 from pprint
import pformat
38 from docutils
import frontend
, writers
, nodes
, SettingsSpec
39 from docutils
.core
import Publisher
40 from docutils
.utils
import SystemMessage
, Reporter
, new_reporter
, new_document
41 from docutils
.frontend
import OptionParser
, make_paths_absolute
, validate_boolean
42 from docutils
.transforms
import Transform
44 from treediff
import TreeMatcher
, HashableNodeImpl
46 ###############################################################################
47 ###############################################################################
48 # Command line specification
50 description
= ("""Generates a structural diff from two reStructuredText input
51 documents and produces an annotated result. """)
53 writerOption
= 'writer'
55 writerArgRE1
= '^--' + writerOption
+ '=' + '(.*)$'
61 def switchOptionsCallback(option
, opt
, value
, parser
, to
):
62 """Callback for `optparse`."""
63 switchOptions(parser
.values
, to
)
68 (('Select writer to write output with (default "xml").',
69 ['--' + writerOption
],
71 ('Following options apply to the old input document'
72 + ' (default: both input documents).',
74 { 'action': 'callback',
75 'callback': switchOptionsCallback
,
76 'callback_args': ( oldOption
, ),
78 ('Following options apply to the new input document'
79 + ' (default: both input documents).',
81 { 'action': 'callback',
82 'callback': switchOptionsCallback
,
83 'callback_args': ( newOption
, ),
85 ('Following options apply to both input documents'
88 { 'action': 'callback',
89 'callback': switchOptionsCallback
,
90 'callback_args': ( bothOption
, ),
92 ('Compare sections by comparing their names (default); '
93 + 'useful when section titles are stable but sections change',
94 ['--compare-sections-by-names'],
95 { 'action': 'store_true',
96 'default': 1, 'validator': validate_boolean
}),
97 ('Compare sections normally; useful when section titles change',
98 ['--compare-sections-normally'],
99 { 'action': 'store_false', 'dest': 'compare_sections_by_names'}),
103 settings_defaults
= {'output_encoding_error_handler': 'xmlcharrefreplace',
104 writerOption
: writerDefault
}
106 config_section
= 'rstdiff'
108 usage
= '%prog [options]... <old> [<new> [<output>]]'
110 ###############################################################################
111 # Classes for three argument command lines
113 switchableMultiOptions
= ( 'strip_elements_with_classes', 'strip_classes', )
114 switchableOptions
= (
115 'title', 'generator', 'datestamp',
116 'source_link', 'source_url',
117 'toc_backlinks', 'footnote_backlinks',
118 'sectnum_xform', 'doctitle_xform', 'docinfo_xform', 'sectsubtitle_xform',
120 'input_encoding', 'input_encoding_error_handler',
122 'pep_references', 'pep_base_url', 'pep_file_url_template',
123 'rfc_references', 'rfc_base_url',
124 'trim_footnote_reference_space',
125 'file_insertion_enabled', 'raw_enabled',
126 'auto_id_prefix', 'id_prefix',
127 ) + switchableMultiOptions
129 def switchOptions(values
, to
):
130 """Switch `values` so following options apply to input document `to`."""
131 lastTo
= getattr(values
, '_optionsTo', '_' + bothOption
)
132 lastTarget
= getattr(values
, lastTo
, None)
135 setattr(values
, lastTo
, lastTarget
)
136 target
= getattr(values
, '_' + to
, None)
139 setattr(values
, to
, target
)
140 for opt
in switchableOptions
:
141 if hasattr(values
, opt
):
143 lastTarget
[opt
] = getattr(values
, opt
)
147 setattr(values
, opt
, target
[opt
])
148 values
._optionsTo
= '_' + to
150 def useOptions(values
, to
):
151 """Set `values` so use options applying to input document `to`."""
152 for opt
in switchableOptions
:
153 if hasattr(values
, opt
):
155 for src
in ( '_' + to
, '_' + bothOption
, ):
156 if hasattr(values
, src
) and opt
in getattr(values
, src
):
157 if opt
in switchableMultiOptions
:
158 if not hasattr(values
, opt
):
159 setattr(values
, opt
, [])
160 if getattr(values
, src
)[opt
] is not None:
161 getattr(values
, opt
).extend(getattr(values
, src
)[opt
])
163 setattr(values
, opt
, getattr(values
, src
)[opt
])
166 class Publisher3Args(Publisher
):
168 def setup_option_parser(self
, usage
=None, description
=None,
169 settings_spec
=None, config_section
=None,
172 if not settings_spec
:
173 settings_spec
= SettingsSpec()
174 settings_spec
.config_section
= config_section
175 parts
= config_section
.split()
176 if len(parts
) > 1 and parts
[-1] == 'application':
177 settings_spec
.config_section_dependencies
= ['applications']
178 #@@@ Add self.source & self.destination to components in future?
179 option_parser
= OptionParser3Args(
180 components
=(self
.parser
, self
.reader
, self
.writer
, settings_spec
),
181 defaults
=defaults
, read_config_files
=1,
182 usage
=usage
, description
=description
)
185 class OptionParser3Args(OptionParser
):
187 def check_values(self
, values
, args
):
188 """Store positional arguments as runtime settings."""
189 # Complete a possible switch
190 switchOptions(values
, bothOption
)
191 values
._old
_source
, values
._new
_source
, values
._destination
= self
.check_args(args
)
192 make_paths_absolute(values
.__dict
__, self
.relative_path_settings
,
194 values
._config
_files
= self
.config_files
197 def check_args(self
, args
):
198 old_source
= new_source
= destination
= None
200 self
.error('At least 1 argument required.')
202 old_source
= args
.pop(0)
203 if old_source
== '-': # means stdin
206 new_source
= args
.pop(0)
207 if new_source
== '-': # means stdin
210 destination
= args
.pop(0)
211 if destination
== '-': # means stdout
214 self
.error('Maximum 3 arguments allowed.')
215 if old_source
is None and new_source
is None:
216 self
.error('Old and new source may not both use stdin.')
217 if (old_source
and old_source
== destination
218 or new_source
and new_source
== destination
):
219 self
.error('Do not specify the same file for both source and '
220 'destination. It will clobber the source file.')
221 return old_source
, new_source
, destination
223 ###############################################################################
224 ###############################################################################
227 class Opcode(object):
228 """Encapsulates opcodes as returned by `TreeMatcher.get_opcodes()`"""
238 def __init__(self
, opcodeTuple
):
239 """Initialize from a tuple returned by `TreeMatcher.get_opcodes()`"""
240 self
._tuple
= list(opcodeTuple
)
242 def getCommand(self
):
243 """Return the command."""
244 return self
._tuple
[0]
246 def getOldRange(self
):
247 """Returns the range pertaining to an old list."""
248 return ( self
._tuple
[1], self
._tuple
[2], )
250 def getNewRange(self
):
251 """Returns the range pertaining to a new list."""
252 return ( self
._tuple
[3], self
._tuple
[4], )
254 def getSubOpcodes(self
):
255 """Return the sub-opcodes in case of `command` == 'descend' or
257 if self
._tuple
[0] != self
.Descend
:
259 return self
._tuple
[5]
261 def resolveOpcode(self
, oldList
, newList
):
262 """Resolves opcode pertaining to `oldList` and `newList`. Returns tuple
266 Same as self.getCommand().
269 The range of elements in `oldList` affected by the opcode.
272 The range of elements in `newList` affected by the opcode.
275 Same as self.getSubOpcodes().
277 oldRange
= self
.getOldRange()
278 newRange
= self
.getNewRange()
279 return ( self
.getCommand(), oldList
[oldRange
[0]:oldRange
[1]],
280 newList
[newRange
[0]:newRange
[1]], self
.getSubOpcodes())
282 def setSubOpcodes(self
, opcodes
):
283 """Set the sub-opcodes to a new list."""
284 if self
._tuple
[0] != self
.Descend
:
285 raise TypeError("Can not set subopcodes of a %r opcode"
286 % ( self
._tuple
[0], ))
287 self
._tuple
[5] = opcodes
289 def setCommand(self
, command
):
290 """Set a new command adapting subopcodes."""
291 if self
._tuple
[0] == command
:
293 self
._tuple
[0] = command
294 if command
== self
.Descend
:
297 self
._tuple
= self
._tuple
[0:5]
299 def setOldRange(self
, range):
300 """Sets the range pertaining to an old list."""
301 ( self
._tuple
[1], self
._tuple
[2], ) = range
303 def setNewRange(self
, range):
304 """Sets the range pertaining to a new list."""
305 ( self
._tuple
[3], self
._tuple
[4], ) = range
308 """Return the opcode as a tuple."""
309 return tuple(self
._tuple
)
311 ###############################################################################
312 ###############################################################################
313 # Additional docutils stuff
315 ###############################################################################
318 class White(nodes
.Text
):
319 """A piece of text containing only whitespace."""
323 """A regular expression matching strings for this class and returning
324 them as the first match."""
325 # TODO Could be subject to an option
328 class Word(nodes
.Text
):
329 """A piece of text containing exactly one word."""
335 """Splits text and returns a sequence of `Word` and `White`
336 objects. Returns an empty sequence for an empty `text`."""
338 subs
= re
.split(White
.re
, text
.astext())
342 elif re
.match(White
.re
, subs
[0]):
343 ( current
, next
, ) = ( White
, Word
, )
345 ( current
, next
, ) = ( Word
, White
, )
347 result
.append(current(sub
))
348 ( current
, next
, ) = ( next
, current
, )
351 ###############################################################################
354 class Text2Words(Transform
):
355 """Transforms a `Text` node into a sequence of `Word`/`White`."""
358 self
.document
.walk(Text2WordsVisitor(self
.document
))
360 class Text2WordsVisitor(nodes
.SparseNodeVisitor
):
362 def visit_Text(self
, text
):
363 words
= Word
.splitText(text
)
366 words
= [ White(''), ]
367 text
.parent
.replace(text
, words
)
369 class Words2Text(Transform
):
370 """Transforms a sequence of `Word`/`White` into a `Text` node."""
373 self
.document
.walk(Words2TextVisitor(self
.document
))
375 class Words2TextVisitor(nodes
.SparseNodeVisitor
):
377 def visit_Text(self
, text
):
379 # Find this node and the first node of the sequence it belongs to
381 for i
in range(len(parent
)):
382 if not isinstance(parent
[i
], nodes
.Text
):
386 # ``parent.index(text)`` uses value equality - can not be
387 # used here to find `text`
388 if id(parent
[i
]) == id(text
):
392 raise IndexError("Can not find %r in its parent" % ( text
, ))
394 if (len(parent
) > end
395 and isinstance(parent
[end
], nodes
.Text
)):
396 # The visitor processes following children even if they are
397 # deleted - so work for last node of a sequence
400 texts
= nodes
.Text(reduce(lambda s
, node
: s
+ node
.astext(),
401 parent
[first
:end
], ""))
402 parent
[first
:end
] = ( texts
, )
404 visit_White
= visit_Text
406 visit_Word
= visit_Text
408 ###############################################################################
409 ###############################################################################
412 class DocutilsDispatcher(HashableNodeImpl
):
413 """Implements hashable for a docutils `Node` and supports construction."""
417 def __init__(self
, reporter
):
418 super(self
.__class
__, self
).__init
__(nodes
.Node
)
419 self
.reporter
= reporter
421 def dispatchClass(self
, function
, node
, *args
):
422 """Dispatch a call of type `function` for the class of `node` using
423 arguments `node` and `args`. Default is to dispatch for imaginary class
425 pat
= "%s_%%s" % ( function
, )
427 name
= pat
% ( node
.__class
__.__name
__, )
428 method
= getattr(self
, name
)
429 except AttributeError:
430 name
= pat
% ( 'UNKNOWN', )
431 method
= getattr(self
, name
)
432 self
.reporter
.debug("*** %s(%s)"
433 % ( name
, ", ".join([ arg
.__class
__.__name
__
435 in ( node
, ) + args
]), ))
436 for arg
in ( node
, ) + args
:
438 self
.reporter
.debug(" > %s" % ( arg
, ))
439 except UnicodeEncodeError:
440 self
.reporter
.debug(" > CANNOT OUTPUT ARGUMENT OF TYPE %s"
442 result
= method(node
, *args
)
444 self
.reporter
.debug(" < %s" % ( result
, ))
445 except UnicodeEncodeError:
446 self
.reporter
.debug(" < CANNOT OUTPUT RESULT OF TYPE %s"
450 ###########################################################################
451 ###########################################################################
452 # Implementation of abstract methods for `HashableNodeImpl`
454 def rootHash(self
, node
):
455 """Return a hash for the root only. Subclasses must override
457 return self
.dispatchClass('rootHash', node
)
459 def rootHash_UNKNOWN(self
, node
):
460 return hash(node
.__class
__)
462 def rootEq(self
, node
, other
):
463 """Returns root equality of `node` and an `other` node. ``True`` if
464 the two nodes as roots are equal without considering their
465 children. This should be true if one node can be replaced by
466 the other and all changes can be represented without changing
467 the node itself. Subclasses must override this."""
468 # Only nodes of the same class can be equal - this assumption
469 # is used in many places
470 if node
.__class
__ != other
.__class
__:
472 return self
.dispatchClass('rootEq', node
, other
)
474 def rootEq_UNKNOWN(self
, node
, other
):
475 # Unless we know better two roots of the same type are considered equal
478 def childHash(self
, node
):
479 """Return a hash for the node as a child. Subclasses must override
481 return self
.dispatchClass('childHash', node
)
483 def childHash_UNKNOWN(self
, node
):
484 # By default compare as a child by comparing children
485 return self
.childrenHash(node
)
487 def childEq(self
, node
, other
):
488 """Returns equality of `node` and an `other` node as children.
489 ``True`` if the child features of the two nodes are equal
490 without considering the root. Subclasses must override
492 # Only nodes of the same class can be equal - this assumption
493 # is used in many places
494 if node
.__class
__ != other
.__class
__:
496 return self
.dispatchClass('childEq', node
, other
)
498 def childEq_UNKNOWN(self
, node
, other
):
499 # By default compare as a child by comparing children
500 return self
.childrenEq(node
, other
)
502 def getChildren(self
, node
):
503 """Return the children of `node` as a list. Subclasses must override
505 return self
.dispatchClass('getChildren', node
)
507 def getChildren_UNKNOWN(self
, node
):
510 ###########################################################################
511 ###########################################################################
514 # TODO The resulting class names should be configurable
515 NewDelete
= 'removed'
517 NewReplaced
= 'replaced'
518 NewReplacement
= 'replacement'
520 def copyRoot(self
, node
):
521 """Copy `node` as root and return it."""
522 return self
.dispatchClass('copyRoot', node
)
524 def copyRoot_UNKNOWN(self
, node
):
527 def addChild(self
, root
, child
):
528 """Add `child` to `root`."""
529 return self
.dispatchClass('addChild', root
, child
)
531 def addChild_UNKNOWN(self
, root
, child
):
534 def copyChild(self
, node
, newType
):
535 """Copy `node` as child and return it. `newType` is ``None`` for an
536 unchanged child or the change type."""
537 return self
.dispatchClass('copyChild', node
, newType
)
539 def copyChild_UNKNOWN(self
, node
, newType
):
540 return self
.setNewType(node
.deepcopy(), newType
)
542 def copyChildren(self
, head
, tail
, root
, newType
):
543 """Return a range of new nodes copied from [ `head` ] + `tail` under
544 `root`. `tail` are all the same class as `head`. Nodes are
545 created approproate to type `newType`."""
546 return self
.dispatchClass('copyChildren', head
, tail
, root
, newType
)
548 def copyChildren_UNKNOWN(self
, head
, tail
, root
, newType
):
549 return [ self
.copyChild(child
, newType
)
550 for child
in [ head
, ] + tail
]
552 def copyRange(self
, root
, children
, newType
):
553 """Return a range of new nodes copied from `children` under `root`.
554 Nodes are created appropriate to type `newType`."""
557 while begin
< len(children
):
558 first
= children
[begin
]
560 while end
< len(children
):
562 if not(first
.__class
__ == last
.__class
__
563 or (isinstance(first
, nodes
.Text
)
564 and isinstance(last
, nodes
.Text
))):
567 result
.extend(self
.copyChildren(first
, children
[begin
+ 1:end
],
572 def mergeChildren(self
, diffRoot
, oldRoot
, newRoot
,
573 command
, oldRange
, newRange
):
574 """Add children to `diffRoot` merging children `oldRange` / `newRange`
575 of `oldRoot` / `newRoot` by `command`."""
576 if command
== Opcode
.Equal
:
578 self
.addChild(diffRoot
, self
.copyChild(old
, None))
579 elif command
== Opcode
.Insert
or command
== Opcode
.Delete
:
580 if command
== Opcode
.Insert
:
583 newType
= self
.NewInsert
587 newType
= self
.NewDelete
588 for newChild
in self
.copyRange(srcRoot
, srcRange
, newType
):
589 self
.addChild(diffRoot
, newChild
)
590 elif command
== Opcode
.Replace
:
591 # TODO Replacement doubles elements. This needs to be
592 # reflected properly in the @ids. If the @ids don't change
593 # there need to be unique @ids for replaced elements. This
594 # needs also to be reflected in referring @refid and
596 for newChild
in self
.copyRange(oldRoot
, oldRange
,
598 self
.addChild(diffRoot
, newChild
)
599 for newChild
in self
.copyRange(newRoot
, newRange
,
600 self
.NewReplacement
):
601 self
.addChild(diffRoot
, newChild
)
603 raise TypeError("Unhandled command %r" % ( command
, ))
605 ###########################################################################
606 ###########################################################################
609 def setNewType(self
, node
, newType
):
610 """Set a class on `node` for `newType` if set. Returns `node`."""
612 node
['classes'].append("change-%s" % ( newType
, ))
615 ###########################################################################
616 ###########################################################################
617 # Real comparison and merging
619 # The idea is like this: Each node has attributes which need to be
620 # compared as root and it has attributes which need to be compared
621 # as child. This is different for every node type.
623 # Similarly each node type may need special methods for cloning
626 ###########################################################################
627 # Text / Word / White
629 def rootHash_Text(self
, node
):
630 return hash(node
.astext())
632 rootHash_Word
= rootHash_Text
634 def rootHash_White(self
, node
):
635 # Whitespace compares all equal
638 def rootEq_Text(self
, node
, other
):
639 return node
.astext() == other
.astext()
641 rootEq_Word
= rootEq_Text
643 def rootEq_White(self
, node
, other
):
644 # TODO Must behave different for places where whitespace
645 # differences are relevant
648 # Text behaves the same as root or child
650 childHash_Text
= rootHash_Text
651 childHash_Word
= rootHash_Word
652 childHash_White
= rootHash_White
654 childEq_Text
= rootEq_Text
655 childEq_Word
= rootEq_Word
656 childEq_White
= rootEq_White
658 def copyChildren_Text(self
, head
, tail
, root
, newType
):
659 if not tail
and isinstance(head
, nodes
.Text
) and not head
.astext():
660 # Do not create empty inlines
662 inline
= nodes
.inline()
663 self
.setNewType(inline
, newType
)
664 inline
.extend([ head
, ] + tail
)
667 # Sequences of Text are treated together
668 copyChildren_Word
= copyChildren_Text
669 copyChildren_White
= copyChildren_Text
671 ###########################################################################
674 def getSectionName(self
, node
):
676 return node
['dupnames'][0]
678 return node
['names'][0]
679 return node
['ids'][0]
681 def rootEq_section(self
, node
, other
):
682 """Compare sections by their names or normally."""
683 if node
.document
.settings
.compare_sections_by_names
:
684 return self
.getSectionName(node
) == self
.getSectionName(other
)
687 ###########################################################################
688 # For some elements their attributes need to be considered to
691 def attributeEq(self
, node
, other
, attribute
):
692 if (attribute
in node
) != (attribute
in other
):
694 if not attribute
in node
:
696 return node
[attribute
] == other
[attribute
]
698 ###########################################################################
701 def rootEq_reference(self
, node
, other
):
702 return self
.attributeEq(node
, other
, 'refuri')
704 ###########################################################################
707 def rootEq_target(self
, node
, other
):
708 return self
.attributeEq(node
, other
, 'refuri')
710 ###########################################################################
713 # TODO This is typically a minor change and should be requested by
716 def attributeEq_bullet_list(self
, node
, other
):
717 return self
.attributeEq(node
, other
, 'bullet')
719 def rootEq_bullet_list(self
, node
, other
):
720 return self
.attributeEq_bullet_list(node
, other
)
722 def childEq_bullet_list(self
, node
, other
):
723 return (self
.attributeEq_bullet_list(node
, other
)
724 and self
.childrenEq(node
, other
))
726 ###########################################################################
729 # TODO This is typically a minor change and should be requested by
732 def attributeEq_enumerated_list(self
, node
, other
):
733 return (self
.attributeEq(node
, other
, 'enumtype')
734 and self
.attributeEq(node
, other
, 'prefix')
735 and self
.attributeEq(node
, other
, 'suffix')
736 and self
.attributeEq(node
, other
, 'start'))
738 def rootEq_enumerated_list(self
, node
, other
):
739 return self
.attributeEq_enumerated_list(node
, other
)
741 def childEq_enumerated_list(self
, node
, other
):
742 return (self
.attributeEq_enumerated_list(node
, other
)
743 and self
.childrenEq(node
, other
))
745 ###########################################################################
748 def rootEq_image(self
, node
, other
):
749 if node
.__class
__ != other
.__class
__:
751 return self
.attributeEq(node
, other
, 'uri')
753 ###########################################################################
754 # Some elements may contain only #PCDATA. They need to propagate
755 # changes in their children up to the element itself.
757 def rootEqWithChildren(self
, node
, other
):
758 if node
.__class
__ != other
.__class
__:
760 return self
.childrenEq(node
, other
)
762 ###########################################################################
765 rootEq_comment
= rootEqWithChildren
767 ###########################################################################
770 rootEq_literal
= rootEqWithChildren
772 ###########################################################################
775 rootEq_option_string
= rootEqWithChildren
777 ###########################################################################
780 # TODO This is typically a minor change and should be requested by
783 rootEq_label
= rootEqWithChildren
785 ###########################################################################
788 # TODO This is typically a minor change and should be requested by
791 rootEq_footnote_reference
= rootEqWithChildren
793 ###########################################################################
796 # TODO This is typically a minor change and should be requested by
799 rootEq_citation_reference
= rootEqWithChildren
801 ###########################################################################
802 # For some elements their attributes need to be considered to
803 # detect changes *and* they may contain only #PCDATA.
805 ###########################################################################
808 # TODO This is typically a minor change and should be requested by
811 def attributeEq_option_argument(self
, node
, other
):
812 return self
.attributeEq(node
, other
, 'delimiter')
814 def rootEq_option_argument(self
, node
, other
):
815 return (self
.attributeEq_option_argument(node
, other
)
816 and self
.rootEqWithChildren(node
, other
))
818 def childEq_option_argument(self
, node
, other
):
819 return (self
.attributeEq_option_argument(node
, other
)
820 and self
.childrenEq(node
, other
))
822 ###########################################################################
823 # A change in certain elements must propagate the change up since
824 # they may occur only once. Must be done by parents.
826 # Checks whether `node` and `other` have both a node of type
827 # `childClass` and whether the first of thosee are equal.
828 def rootEqWithChild(self
, node
, other
, childClass
):
829 if node
.__class
__ != other
.__class
__:
833 for nodeChild
in self
.getChildren(node
):
834 if isinstance(nodeChild
, childClass
):
835 nodeFound
= nodeChild
839 for otherChild
in self
.getChildren(other
):
840 if isinstance(otherChild
, childClass
):
841 otherFound
= otherChild
844 if nodeFound
is None or otherFound
is None:
847 return self
.childEq(nodeFound
, otherFound
)
849 ###########################################################################
852 def rootEq_footnote(self
, node
, other
):
853 return self
.rootEqWithChild(node
, other
, nodes
.label
)
855 ###########################################################################
858 def rootEq_citation(self
, node
, other
):
859 return self
.rootEqWithChild(node
, other
, nodes
.label
)
861 ###########################################################################
864 def rootEq_option(self
, node
, other
):
865 return self
.rootEqWithChild(node
, other
, nodes
.option_string
)
867 ###########################################################################
868 # Some attributes of some elements depend on their concrete parents.
871 def copyRoot_tgroup(self
, node
):
873 copy
['origcols'] = copy
['cols']
877 def addChild_tgroup(self
, root
, child
):
879 # This works only if for each column there is a `colspec`. Is
881 if isinstance(child
, nodes
.colspec
):
883 elif isinstance(child
, nodes
.tbody
):
884 # All columns seen - check the column widths
885 if root
['origcols'] != root
['cols']:
887 if isinstance(elem
, nodes
.colspec
):
888 elem
['colwidth'] = 100 / root
['cols']
891 # TODO Number of entries must change according to the (changed)
892 # number of columns; for added or removed columns entries of *one*
893 # column must be added / removed
895 ###############################################################################
896 ###############################################################################
899 def processCommandLine():
900 """Process command line and return a `Publisher`."""
901 # Determine writer here so options can be given normally
902 preWriter
= writerDefault
904 match
= re
.search(writerArgRE1
, arg
)
906 preWriter
= match
.group(1)
908 pub
= Publisher3Args()
909 pub
.set_reader('standalone', None, 'restructuredtext')
910 pub
.set_writer(preWriter
)
912 settingsSpec
= SettingsSpec()
913 settingsSpec
.settings_spec
= settings_spec
914 settingsSpec
.settings_defaults
= settings_defaults
915 pub
.process_command_line(usage
=usage
, description
=description
,
916 settings_spec
=settingsSpec
,
917 config_section
=config_section
)
918 if pub
.settings
.writer
!= preWriter
:
919 new_reporter('<cmdline>',
920 pub
.settings
).severe("Internal error: Mismatch of pre-parsed (%r) and real (%r) writer"
921 % ( preWriter
, pub
.settings
.writer
, ))
922 pub
.set_destination()
925 def readTree(pub
, sourceName
):
926 """Read and return a tree from `sourceName`."""
927 # Reset reader - just in case it keeps state from a previous invocation
928 pub
.set_reader('standalone', None, 'restructuredtext')
929 pub
.set_source(None, sourceName
)
931 pub
.document
= pub
.reader
.read(pub
.source
, pub
.parser
, pub
.settings
)
932 pub
.apply_transforms()
935 def doDiff(hashableNodeImpl
, oldTree
, newTree
):
936 """Create a difference from `oldTree` to `newTree` using
937 `hashableNodeImpl`. Returns the opcodes necessary to transform
938 `oldTree` to `newTree`."""
939 matcher
= TreeMatcher(hashableNodeImpl
, oldTree
, newTree
,
940 lambda node
: isinstance(node
, White
))
941 return matcher
.get_opcodes()
943 def buildDocument(oldTree
, newTree
, settings
):
944 """Returns a new document for the result of converting `oldTree` to
946 if (not isinstance(oldTree
, docutils
.nodes
.document
)
947 or not isinstance(newTree
, docutils
.nodes
.document
)):
948 raise TypeError("Roots of trees must be documents")
949 return new_document(u
"%s => %s"
950 % ( settings
._old
_source
, settings
._new
_source
, ),
953 def buildTree(dispatcher
, diffRoot
, opcodes
, oldRoot
, newRoot
):
954 """Adds a new sub-tree under `diffRoot` converting children of
955 `oldRoot` to `newRoot` using `opcodes`."""
956 oldChildren
= dispatcher
.getChildren(oldRoot
)
957 newChildren
= dispatcher
.getChildren(newRoot
)
958 for opcode
in opcodes
:
959 ( command
, oldRange
, newRange
,
960 subOpcodes
, ) = Opcode(opcode
).resolveOpcode(oldChildren
, newChildren
)
961 if command
== Opcode
.Descend
:
962 child
= dispatcher
.copyRoot(oldRange
[0])
963 dispatcher
.addChild(diffRoot
, child
)
964 buildTree(dispatcher
, child
,
965 subOpcodes
, oldRange
[0], newRange
[0])
967 dispatcher
.mergeChildren(diffRoot
, oldRoot
, newRoot
,
968 command
, oldRange
, newRange
)
970 # A replacement in certain elements must not be propagated up since
971 # they may occur only once and replacement would double them
972 replaceNotUp
= ( nodes
.title
, nodes
.subtitle
, nodes
.term
, nodes
.field_name
,
973 nodes
.attribution
, nodes
.caption
, # (%text.model)
974 nodes
.header
, nodes
.footer
, nodes
.definition
,
975 nodes
.field_body
, nodes
.description
, nodes
.legend
,
976 nodes
.entry
, # (%body.elements;+) or (%body.elements;*)
977 nodes
.decoration
, nodes
.docinfo
, nodes
.transition
,
978 nodes
.option_group
, nodes
.thead
,
979 nodes
.tbody
, # different content model
982 # A replacement in certain elements normally not subject to up
983 # propagation and contained in certain elements may propagate up if
984 # all their siblings are also replacements and would propagate up
985 replaceUpSiblings
= (
986 ( nodes
.title
, nodes
.section
, ),
987 ( nodes
.subtitle
, nodes
.section
, ),
988 ( nodes
.term
, nodes
.definition_list_item
, ),
989 ( nodes
.field_name
, nodes
.field
, ),
990 ( nodes
.attribution
, nodes
.block_quote
, ),
991 ( nodes
.caption
, nodes
.figure
, ),
992 ( nodes
.definition
, nodes
.definition_list_item
, ),
993 ( nodes
.field_body
, nodes
.field
, ),
994 ( nodes
.description
, nodes
.option_list_item
, ),
995 ( nodes
.legend
, nodes
.figure
, ),
996 ( nodes
.option_group
, nodes
.option_list_item
, ),
999 # TODO If much text is replaced in a text element the whole element
1000 # should be replaced. This makes more sense to people than two large
1001 # replaced/replacement blocks where the only equality is in words like
1002 # "the". The exact meaning of "much" should be an option.
1003 def cleanOpcodes(opcodes
, dispatcher
, oldList
, newList
):
1004 """Replace some nasty results in `opcodes` by cleaner versions. Opcodes
1005 create `newList` from `oldList`."""
1006 mightReplaceUpSiblings
= [ ]
1007 for i
in range(len(opcodes
)):
1008 opcode
= Opcode(opcodes
[i
])
1009 ( command
, oldRange
, newRange
, subOpcodes
,
1010 ) = opcode
.resolveOpcode(oldList
, newList
)
1012 # Nothing to clean for flat or empty opcodes
1015 oldNode
= oldRange
[0]
1016 newNode
= newRange
[0]
1017 cleanOpcodes(subOpcodes
, dispatcher
, dispatcher
.getChildren(oldNode
),
1018 dispatcher
.getChildren(newNode
))
1020 while j
< len(subOpcodes
):
1021 prev
= Opcode(subOpcodes
[j
- 1])
1022 this
= Opcode(subOpcodes
[j
])
1023 if (this
.getCommand() != Opcode
.Descend
1024 and prev
.getCommand() == this
.getCommand()):
1025 # Merge adjacing opcodes of same type
1026 prevOld
= prev
.getOldRange()
1027 prevNew
= prev
.getNewRange()
1028 thisOld
= this
.getOldRange()
1029 thisNew
= this
.getNewRange()
1030 prev
.setOldRange(( prevOld
[0], thisOld
[1], ))
1031 prev
.setNewRange(( prevNew
[0], thisNew
[1], ))
1032 subOpcodes
[j
- 1:j
+ 1] = [ prev
.asTuple(), ]
1035 opcode
.setSubOpcodes(subOpcodes
)
1036 if len(subOpcodes
) == 1:
1037 subOpcode
= Opcode(subOpcodes
[0])
1038 if subOpcode
.getCommand() == Opcode
.Descend
:
1040 elif subOpcode
.getCommand() == Opcode
.Replace
:
1041 if any([ isinstance(oldNode
, cls
)
1042 for cls
in replaceNotUp
]):
1044 if any([ isinstance(oldNode
, cls
)
1045 and isinstance(oldNode
.parent
, parentCls
)
1046 for ( cls
, parentCls
, ) in replaceUpSiblings
]):
1047 # If for instance a section/title would
1048 # propagate a replacement up the propagation
1049 # needs to be done if all siblings would
1050 # also propagate a replacement up
1051 mightReplaceUpSiblings
.append(i
)
1057 # Propagate 1-element sequences up
1058 opcode
.setCommand(subOpcode
.getCommand())
1059 opcodes
[i
] = opcode
.asTuple()
1061 if mightReplaceUpSiblings
:
1062 # There are entries which might propagate a replace up if all
1063 # siblings could do as well
1064 if all([ i
in mightReplaceUpSiblings
1065 or Opcode(opcodes
[i
]).getCommand() == Opcode
.Replace
1066 for i
in range(len(opcodes
)) ]):
1067 # All entries are replacements which may propagate up -
1068 # actually propagate elements which may propagate
1069 for i
in mightReplaceUpSiblings
:
1070 opcode
= Opcode(opcodes
[i
])
1071 opcode
.setCommand(Opcode
.Replace
)
1072 opcodes
[i
] = opcode
.asTuple()
1074 def createDiff(pub
, oldTree
, newTree
):
1075 """Create and return a diff document from `oldTree` to `newTree`."""
1076 dispatcher
= DocutilsDispatcher(new_reporter("DIFF", pub
.settings
))
1077 opcodes
= doDiff(dispatcher
, oldTree
, newTree
)
1079 if pub
.settings
.debug
:
1080 # This may be expensive so guard this explicitly
1081 oldTree
.reporter
.debug(oldTree
.asdom().toprettyxml())
1082 newTree
.reporter
.debug(newTree
.asdom().toprettyxml())
1083 oldTree
.reporter
.debug(pformat(opcodes
, 2, 40, None))
1084 oldTree
.reporter
.debug("^^^ Before cleaning vvv After cleaning")
1086 cleanOpcodes(opcodes
, dispatcher
, [ oldTree
], [ newTree
])
1088 if pub
.settings
.debug
:
1089 # This may be expensive so guard this explicitly
1090 oldTree
.reporter
.debug(pformat(opcodes
, 2, 40, None))
1092 if len(opcodes
) != 1:
1093 raise TypeError("Don't know how to merge documents which are not rootEq")
1094 opcode
= Opcode(opcodes
[0])
1095 if opcode
.getCommand() not in ( Opcode
.Descend
, Opcode
.Equal
, ):
1096 # TODO There should be a sense making message for this case
1097 # because this may happen due to up propagation of replacements
1098 raise TypeError("Don't know how to merge top level opcode of type %r"
1099 % ( opcode
.getCommand(), ))
1101 diffDoc
= buildDocument(oldTree
, newTree
, pub
.settings
)
1102 if opcode
.getCommand() == Opcode
.Equal
:
1103 # TODO Equality should be reported somehow
1104 diffDoc
.extend([ child
.deepcopy()
1105 for child
in newTree
.children
])
1107 buildTree(dispatcher
, diffDoc
, opcode
.getSubOpcodes(), oldTree
, newTree
)
1110 if __name__
== '__main__':
1111 pub
= processCommandLine()
1113 useOptions(pub
.settings
, oldOption
)
1114 oldTree
= readTree(pub
, pub
.settings
._old
_source
)
1115 useOptions(pub
.settings
, newOption
)
1116 newTree
= readTree(pub
, pub
.settings
._new
_source
)
1117 useOptions(pub
.settings
, bothOption
)
1119 Text2Words(oldTree
).apply()
1120 Text2Words(newTree
).apply()
1122 diffDoc
= createDiff(pub
, oldTree
, newTree
)
1123 Words2Text(diffDoc
).apply()
1125 pub
.writer
.write(diffDoc
, pub
.destination
)
1126 pub
.writer
.assemble_parts()
1128 # TODO The CSS classes need to be set in a CSS stylesheet