3 # Copyright (C) 2010 Stefan Merten
5 # rstdiff.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 Generates a structural diff from two reStructuredText input documents
22 and produces an annotated result.
25 __docformat__
= 'reStructuredText'
29 locale
.setlocale(locale
.LC_ALL
, '')
35 from pprint
import pformat
36 from optparse
import SUPPRESS_HELP
39 from docutils
import frontend
, writers
, nodes
, SettingsSpec
40 from docutils
.core
import Publisher
41 from docutils
.utils
import SystemMessage
, Reporter
, new_reporter
, new_document
42 from docutils
.frontend
import OptionParser
, make_paths_absolute
, validate_boolean
43 from docutils
.transforms
import Transform
45 from treediff
import TreeMatcher
, HashableNodeImpl
47 ###############################################################################
48 ###############################################################################
49 # Command line specification
51 description
= ("""Generates a structural diff from two reStructuredText input
52 documents and produces an annotated result. """)
54 writerOption
= 'writer'
56 writerArgRE1
= '^--' + writerOption
+ '=' + '(.*)$'
62 def switchOptionsCallback(option
, opt
, value
, parser
, to
):
63 """Callback for `optparse`."""
64 switchOptions(parser
.values
, to
)
69 (('Select writer to write output with (default "xml").',
70 ['--' + writerOption
],
72 ('Following options apply to the old input document'
73 + ' (default: both input documents).',
75 { 'action': 'callback',
76 'callback': switchOptionsCallback
,
77 'callback_args': ( oldOption
, ),
79 ('Following options apply to the new input document'
80 + ' (default: both input documents).',
82 { 'action': 'callback',
83 'callback': switchOptionsCallback
,
84 'callback_args': ( newOption
, ),
86 ('Following options apply to both input documents'
89 { 'action': 'callback',
90 'callback': switchOptionsCallback
,
91 'callback_args': ( bothOption
, ),
93 ('Compare sections by comparing their names (default); '
94 + 'useful when section titles are stable but sections change',
95 ['--compare-sections-by-names'],
96 { 'action': 'store_true',
97 'default': 1, 'validator': validate_boolean
}),
98 ('Compare sections normally; useful when section titles change',
99 ['--compare-sections-normally'],
100 { 'action': 'store_false', 'dest': 'compare_sections_by_names'}),
101 (SUPPRESS_HELP
, ['--dump-rstdiff'], {'action': 'store_true'}),
105 settings_defaults
= {'output_encoding_error_handler': 'xmlcharrefreplace',
106 writerOption
: writerDefault
}
108 config_section
= 'rstdiff'
110 usage
= '%prog [options]... <old> [<new> [<output>]]'
112 ###############################################################################
113 # Classes for three argument command lines
115 switchableMultiOptions
= ( 'strip_elements_with_classes', 'strip_classes', )
116 switchableOptions
= (
117 'title', 'generator', 'datestamp',
118 'source_link', 'source_url',
119 'toc_backlinks', 'footnote_backlinks',
120 'sectnum_xform', 'doctitle_xform', 'docinfo_xform', 'sectsubtitle_xform',
122 'input_encoding', 'input_encoding_error_handler',
124 'pep_references', 'pep_base_url', 'pep_file_url_template',
125 'rfc_references', 'rfc_base_url',
126 'trim_footnote_reference_space',
127 'file_insertion_enabled', 'raw_enabled',
128 'auto_id_prefix', 'id_prefix',
129 ) + switchableMultiOptions
131 def switchOptions(values
, to
):
132 """Switch `values` so following options apply to input document `to`."""
133 lastTo
= getattr(values
, '_optionsTo', '_' + bothOption
)
134 lastTarget
= getattr(values
, lastTo
, None)
137 setattr(values
, lastTo
, lastTarget
)
138 target
= getattr(values
, '_' + to
, None)
141 setattr(values
, to
, target
)
142 for opt
in switchableOptions
:
143 if hasattr(values
, opt
):
145 lastTarget
[opt
] = getattr(values
, opt
)
149 setattr(values
, opt
, target
[opt
])
150 values
._optionsTo
= '_' + to
152 def useOptions(values
, to
):
153 """Set `values` so use options applying to input document `to`."""
154 for opt
in switchableOptions
:
155 if hasattr(values
, opt
):
157 for src
in ( '_' + to
, '_' + bothOption
, ):
158 if hasattr(values
, src
) and opt
in getattr(values
, src
):
159 if opt
in switchableMultiOptions
:
160 if not hasattr(values
, opt
):
161 setattr(values
, opt
, [])
162 if getattr(values
, src
)[opt
] is not None:
163 getattr(values
, opt
).extend(getattr(values
, src
)[opt
])
165 setattr(values
, opt
, getattr(values
, src
)[opt
])
168 class Publisher3Args(Publisher
):
170 def setup_option_parser(self
, usage
=None, description
=None,
171 settings_spec
=None, config_section
=None,
174 if not settings_spec
:
175 settings_spec
= SettingsSpec()
176 settings_spec
.config_section
= config_section
177 parts
= config_section
.split()
178 if len(parts
) > 1 and parts
[-1] == 'application':
179 settings_spec
.config_section_dependencies
= ['applications']
180 #@@@ Add self.source & self.destination to components in future?
181 option_parser
= OptionParser3Args(
182 components
=(self
.parser
, self
.reader
, self
.writer
, settings_spec
),
183 defaults
=defaults
, read_config_files
=1,
184 usage
=usage
, description
=description
)
187 class OptionParser3Args(OptionParser
):
189 def check_values(self
, values
, args
):
190 """Store positional arguments as runtime settings."""
191 # Complete a possible switch
192 switchOptions(values
, bothOption
)
193 values
._old
_source
, values
._new
_source
, values
._destination
= self
.check_args(args
)
194 make_paths_absolute(values
.__dict
__, self
.relative_path_settings
,
196 values
._config
_files
= self
.config_files
199 def check_args(self
, args
):
200 old_source
= new_source
= destination
= None
202 self
.error('At least 1 argument required.')
204 old_source
= args
.pop(0)
205 if old_source
== '-': # means stdin
208 new_source
= args
.pop(0)
209 if new_source
== '-': # means stdin
212 destination
= args
.pop(0)
213 if destination
== '-': # means stdout
216 self
.error('Maximum 3 arguments allowed.')
217 if old_source
is None and new_source
is None:
218 self
.error('Old and new source may not both use stdin.')
219 if (old_source
and old_source
== destination
220 or new_source
and new_source
== destination
):
221 self
.error('Do not specify the same file for both source and '
222 'destination. It will clobber the source file.')
223 return old_source
, new_source
, destination
225 ###############################################################################
226 ###############################################################################
229 class Opcode(object):
230 """Encapsulates opcodes as returned by `TreeMatcher.get_opcodes()`"""
240 def __init__(self
, opcodeTuple
):
241 """Initialize from a tuple returned by `TreeMatcher.get_opcodes()`"""
242 self
._tuple
= list(opcodeTuple
)
244 def getCommand(self
):
245 """Return the command."""
246 return self
._tuple
[0]
248 def getOldRange(self
):
249 """Returns the range pertaining to an old list."""
250 return ( self
._tuple
[1], self
._tuple
[2], )
252 def getNewRange(self
):
253 """Returns the range pertaining to a new list."""
254 return ( self
._tuple
[3], self
._tuple
[4], )
256 def getSubOpcodes(self
):
257 """Return the sub-opcodes in case of `command` == 'descend' or
259 if self
._tuple
[0] != self
.Descend
:
261 return self
._tuple
[5]
263 def resolveOpcode(self
, oldList
, newList
):
264 """Resolves opcode pertaining to `oldList` and `newList`. Returns tuple
268 Same as self.getCommand().
271 The range of elements in `oldList` affected by the opcode.
274 The range of elements in `newList` affected by the opcode.
277 Same as self.getSubOpcodes().
279 oldRange
= self
.getOldRange()
280 newRange
= self
.getNewRange()
281 return ( self
.getCommand(), oldList
[oldRange
[0]:oldRange
[1]],
282 newList
[newRange
[0]:newRange
[1]], self
.getSubOpcodes())
284 def setSubOpcodes(self
, opcodes
):
285 """Set the sub-opcodes to a new list."""
286 if self
._tuple
[0] != self
.Descend
:
287 raise TypeError("Can not set subopcodes of a %r opcode"
288 % ( self
._tuple
[0], ))
289 self
._tuple
[5] = opcodes
291 def setCommand(self
, command
):
292 """Set a new command adapting subopcodes."""
293 if self
._tuple
[0] == command
:
295 self
._tuple
[0] = command
296 if command
== self
.Descend
:
299 self
._tuple
= self
._tuple
[0:5]
301 def setOldRange(self
, range):
302 """Sets the range pertaining to an old list."""
303 ( self
._tuple
[1], self
._tuple
[2], ) = range
305 def setNewRange(self
, range):
306 """Sets the range pertaining to a new list."""
307 ( self
._tuple
[3], self
._tuple
[4], ) = range
310 """Return the opcode as a tuple."""
311 return tuple(self
._tuple
)
313 ###############################################################################
314 ###############################################################################
315 # Additional docutils stuff
317 ###############################################################################
320 class White(nodes
.Text
):
321 """A piece of text containing only whitespace."""
325 """A regular expression matching strings for this class and returning
326 them as the first match."""
327 # TODO Could be subject to an option
330 class Word(nodes
.Text
):
331 """A piece of text containing exactly one word."""
337 """Splits text and returns a sequence of `Word` and `White`
338 objects. Returns an empty sequence for an empty `text`."""
340 subs
= re
.split(White
.re
, text
.astext())
344 elif re
.match(White
.re
, subs
[0]):
345 ( current
, next
, ) = ( White
, Word
, )
347 ( current
, next
, ) = ( Word
, White
, )
349 result
.append(current(sub
))
350 ( current
, next
, ) = ( next
, current
, )
353 ###############################################################################
356 class Text2Words(Transform
):
357 """Transforms a `Text` node into a sequence of `Word`/`White`."""
360 self
.document
.walk(Text2WordsVisitor(self
.document
))
362 class Text2WordsVisitor(nodes
.SparseNodeVisitor
):
364 def visit_Text(self
, text
):
365 words
= Word
.splitText(text
)
368 words
= [ White(''), ]
369 text
.parent
.replace(text
, words
)
371 class Words2Text(Transform
):
372 """Transforms a sequence of `Word`/`White` into a `Text` node."""
375 self
.document
.walk(Words2TextVisitor(self
.document
))
377 class Words2TextVisitor(nodes
.SparseNodeVisitor
):
379 def visit_Text(self
, text
):
381 # Find this node and the first node of the sequence it belongs to
383 for i
in range(len(parent
)):
384 if not isinstance(parent
[i
], nodes
.Text
):
388 # ``parent.index(text)`` uses value equality - can not be
389 # used here to find `text`
390 if id(parent
[i
]) == id(text
):
394 raise IndexError("Can not find %r in its parent" % ( text
, ))
396 if (len(parent
) > end
397 and isinstance(parent
[end
], nodes
.Text
)):
398 # The visitor processes following children even if they are
399 # deleted - so work for last node of a sequence
402 texts
= nodes
.Text(reduce(lambda s
, node
: s
+ node
.astext(),
403 parent
[first
:end
], ""))
404 parent
[first
:end
] = ( texts
, )
406 visit_White
= visit_Text
408 visit_Word
= visit_Text
410 ###############################################################################
411 ###############################################################################
414 class DocutilsDispatcher(HashableNodeImpl
):
415 """Implements hashable for a docutils `Node` and supports construction."""
419 def __init__(self
, reporter
):
420 super(self
.__class
__, self
).__init
__(nodes
.Node
)
421 self
.reporter
= reporter
423 def dispatchClass(self
, function
, node
, *args
):
424 """Dispatch a call of type `function` for the class of `node` using
425 arguments `node` and `args`. Default is to dispatch for imaginary class
427 pat
= "%s_%%s" % ( function
, )
429 name
= pat
% ( node
.__class
__.__name
__, )
430 method
= getattr(self
, name
)
431 except AttributeError:
432 name
= pat
% ( 'UNKNOWN', )
433 method
= getattr(self
, name
)
434 self
.reporter
.debug("*** %s(%s)"
435 % ( name
, ", ".join([ arg
.__class
__.__name
__
437 in ( node
, ) + args
]), ))
438 for arg
in ( node
, ) + args
:
440 self
.reporter
.debug(" > %s" % ( arg
, ))
441 except UnicodeEncodeError:
442 self
.reporter
.debug(" > CANNOT OUTPUT ARGUMENT OF TYPE %s"
444 result
= method(node
, *args
)
446 self
.reporter
.debug(" < %s" % ( result
, ))
447 except UnicodeEncodeError:
448 self
.reporter
.debug(" < CANNOT OUTPUT RESULT OF TYPE %s"
452 ###########################################################################
453 ###########################################################################
454 # Implementation of abstract methods for `HashableNodeImpl`
456 def rootHash(self
, node
):
457 """Return a hash for the root only. Subclasses must override
459 return self
.dispatchClass('rootHash', node
)
461 def rootHash_UNKNOWN(self
, node
):
462 return hash(node
.__class
__)
464 def rootEq(self
, node
, other
):
465 """Returns root equality of `node` and an `other` node. ``True`` if
466 the two nodes as roots are equal without considering their
467 children. This should be true if one node can be replaced by
468 the other and all changes can be represented without changing
469 the node itself. Subclasses must override this."""
470 # Only nodes of the same class can be equal - this assumption
471 # is used in many places
472 if node
.__class
__ != other
.__class
__:
474 return self
.dispatchClass('rootEq', node
, other
)
476 def rootEq_UNKNOWN(self
, node
, other
):
477 # Unless we know better two roots of the same type are considered equal
480 def childHash(self
, node
):
481 """Return a hash for the node as a child. Subclasses must override
483 return self
.dispatchClass('childHash', node
)
485 def childHash_UNKNOWN(self
, node
):
486 # By default compare as a child by comparing children
487 return self
.childrenHash(node
)
489 def childEq(self
, node
, other
):
490 """Returns equality of `node` and an `other` node as children.
491 ``True`` if the child features of the two nodes are equal
492 without considering the root. Subclasses must override
494 # Only nodes of the same class can be equal - this assumption
495 # is used in many places
496 if node
.__class
__ != other
.__class
__:
498 return self
.dispatchClass('childEq', node
, other
)
500 def childEq_UNKNOWN(self
, node
, other
):
501 # By default compare as a child by comparing children
502 return self
.childrenEq(node
, other
)
504 def getChildren(self
, node
):
505 """Return the children of `node` as a list. Subclasses must override
507 return self
.dispatchClass('getChildren', node
)
509 def getChildren_UNKNOWN(self
, node
):
512 ###########################################################################
513 ###########################################################################
516 # TODO The resulting class names should be configurable
517 NewDelete
= 'removed'
519 NewReplaced
= 'replaced'
520 NewReplacement
= 'replacement'
522 def copyRoot(self
, node
):
523 """Copy `node` as root and return it."""
524 return self
.dispatchClass('copyRoot', node
)
526 def copyRoot_UNKNOWN(self
, node
):
529 def addChild(self
, root
, child
):
530 """Add `child` to `root`."""
531 return self
.dispatchClass('addChild', root
, child
)
533 def addChild_UNKNOWN(self
, root
, child
):
536 def copyChild(self
, node
, newType
):
537 """Copy `node` as child and return it. `newType` is ``None`` for an
538 unchanged child or the change type."""
539 return self
.dispatchClass('copyChild', node
, newType
)
541 def copyChild_UNKNOWN(self
, node
, newType
):
542 return self
.setNewType(node
.deepcopy(), newType
)
544 def copyChildren(self
, head
, tail
, root
, newType
):
545 """Return a range of new nodes copied from [ `head` ] + `tail` under
546 `root`. `tail` are all the same class as `head`. Nodes are
547 created approproate to type `newType`."""
548 return self
.dispatchClass('copyChildren', head
, tail
, root
, newType
)
550 def copyChildren_UNKNOWN(self
, head
, tail
, root
, newType
):
551 return [ self
.copyChild(child
, newType
)
552 for child
in [ head
, ] + tail
]
554 def copyRange(self
, root
, children
, newType
):
555 """Return a range of new nodes copied from `children` under `root`.
556 Nodes are created appropriate to type `newType`."""
559 while begin
< len(children
):
560 first
= children
[begin
]
562 while end
< len(children
):
564 if not(first
.__class
__ == last
.__class
__
565 or (isinstance(first
, nodes
.Text
)
566 and isinstance(last
, nodes
.Text
))):
569 result
.extend(self
.copyChildren(first
, children
[begin
+ 1:end
],
574 def mergeChildren(self
, diffRoot
, oldRoot
, newRoot
,
575 command
, oldRange
, newRange
):
576 """Add children to `diffRoot` merging children `oldRange` / `newRange`
577 of `oldRoot` / `newRoot` by `command`."""
578 if command
== Opcode
.Equal
:
580 self
.addChild(diffRoot
, self
.copyChild(old
, None))
581 elif command
== Opcode
.Insert
or command
== Opcode
.Delete
:
582 if command
== Opcode
.Insert
:
585 newType
= self
.NewInsert
589 newType
= self
.NewDelete
590 for newChild
in self
.copyRange(srcRoot
, srcRange
, newType
):
591 self
.addChild(diffRoot
, newChild
)
592 elif command
== Opcode
.Replace
:
593 # TODO Replacement doubles elements. This needs to be
594 # reflected properly in the @ids. If the @ids don't change
595 # there need to be unique @ids for replaced elements. This
596 # needs also to be reflected in referring @refid and
598 for newChild
in self
.copyRange(oldRoot
, oldRange
,
600 self
.addChild(diffRoot
, newChild
)
601 for newChild
in self
.copyRange(newRoot
, newRange
,
602 self
.NewReplacement
):
603 self
.addChild(diffRoot
, newChild
)
605 raise TypeError("Unhandled command %r" % ( command
, ))
607 ###########################################################################
608 ###########################################################################
611 def setNewType(self
, node
, newType
):
612 """Set a class on `node` for `newType` if set. Returns `node`."""
614 node
['classes'].append("change-%s" % ( newType
, ))
617 ###########################################################################
618 ###########################################################################
619 # Real comparison and merging
621 # The idea is like this: Each node has attributes which need to be
622 # compared as root and it has attributes which need to be compared
623 # as child. This is different for every node type.
625 # Similarly each node type may need special methods for cloning
628 ###########################################################################
629 # Text / Word / White
631 def rootHash_Text(self
, node
):
632 return hash(node
.astext())
634 rootHash_Word
= rootHash_Text
636 def rootHash_White(self
, node
):
637 # Whitespace compares all equal
640 def rootEq_Text(self
, node
, other
):
641 return node
.astext() == other
.astext()
643 rootEq_Word
= rootEq_Text
645 def rootEq_White(self
, node
, other
):
646 # TODO Must behave different for places where whitespace
647 # differences are relevant
650 # Text behaves the same as root or child
652 childHash_Text
= rootHash_Text
653 childHash_Word
= rootHash_Word
654 childHash_White
= rootHash_White
656 childEq_Text
= rootEq_Text
657 childEq_Word
= rootEq_Word
658 childEq_White
= rootEq_White
660 def copyChildren_Text(self
, head
, tail
, root
, newType
):
661 if not tail
and isinstance(head
, nodes
.Text
) and not head
.astext():
662 # Do not create empty inlines
664 inline
= nodes
.inline()
665 self
.setNewType(inline
, newType
)
666 inline
.extend([ head
, ] + tail
)
669 # Sequences of Text are treated together
670 copyChildren_Word
= copyChildren_Text
671 copyChildren_White
= copyChildren_Text
673 ###########################################################################
676 def getSectionName(self
, node
):
677 """Return the best name for `node`."""
679 return node
['dupnames'][0]
681 return node
['names'][0]
683 return node
['ids'][0]
684 return '' # No idea...
686 def rootEq_section(self
, node
, other
):
687 """Compare sections by their names or normally."""
688 if node
.document
.settings
.compare_sections_by_names
:
689 return self
.getSectionName(node
) == self
.getSectionName(other
)
692 ###########################################################################
693 # For some elements their attributes need to be considered to
696 def attributeEq(self
, node
, other
, attribute
):
697 if (attribute
in node
) != (attribute
in other
):
699 if not attribute
in node
:
701 return node
[attribute
] == other
[attribute
]
703 ###########################################################################
706 def rootEq_reference(self
, node
, other
):
707 return self
.attributeEq(node
, other
, 'refuri')
709 ###########################################################################
712 def rootEq_target(self
, node
, other
):
713 return self
.attributeEq(node
, other
, 'refuri')
715 ###########################################################################
718 # TODO This is typically a minor change and should be requested by
721 def attributeEq_bullet_list(self
, node
, other
):
722 return self
.attributeEq(node
, other
, 'bullet')
724 def rootEq_bullet_list(self
, node
, other
):
725 return self
.attributeEq_bullet_list(node
, other
)
727 def childEq_bullet_list(self
, node
, other
):
728 return (self
.attributeEq_bullet_list(node
, other
)
729 and self
.childrenEq(node
, other
))
731 ###########################################################################
734 # TODO This is typically a minor change and should be requested by
737 def attributeEq_enumerated_list(self
, node
, other
):
738 return (self
.attributeEq(node
, other
, 'enumtype')
739 and self
.attributeEq(node
, other
, 'prefix')
740 and self
.attributeEq(node
, other
, 'suffix')
741 and self
.attributeEq(node
, other
, 'start'))
743 def rootEq_enumerated_list(self
, node
, other
):
744 return self
.attributeEq_enumerated_list(node
, other
)
746 def childEq_enumerated_list(self
, node
, other
):
747 return (self
.attributeEq_enumerated_list(node
, other
)
748 and self
.childrenEq(node
, other
))
750 ###########################################################################
753 def rootEq_image(self
, node
, other
):
754 if node
.__class
__ != other
.__class
__:
756 return self
.attributeEq(node
, other
, 'uri')
758 ###########################################################################
759 # Some elements may contain only #PCDATA. They need to propagate
760 # changes in their children up to the element itself.
762 def rootEqWithChildren(self
, node
, other
):
763 if node
.__class
__ != other
.__class
__:
765 return self
.childrenEq(node
, other
)
767 ###########################################################################
770 rootEq_comment
= rootEqWithChildren
772 ###########################################################################
775 rootEq_literal
= rootEqWithChildren
777 ###########################################################################
780 rootEq_option_string
= rootEqWithChildren
782 ###########################################################################
785 # TODO This is typically a minor change and should be requested by
788 rootEq_label
= rootEqWithChildren
790 ###########################################################################
793 # TODO This is typically a minor change and should be requested by
796 rootEq_footnote_reference
= rootEqWithChildren
798 ###########################################################################
801 # TODO This is typically a minor change and should be requested by
804 rootEq_citation_reference
= rootEqWithChildren
806 ###########################################################################
807 # For some elements their attributes need to be considered to
808 # detect changes *and* they may contain only #PCDATA.
810 ###########################################################################
813 # TODO This is typically a minor change and should be requested by
816 def attributeEq_option_argument(self
, node
, other
):
817 return self
.attributeEq(node
, other
, 'delimiter')
819 def rootEq_option_argument(self
, node
, other
):
820 return (self
.attributeEq_option_argument(node
, other
)
821 and self
.rootEqWithChildren(node
, other
))
823 def childEq_option_argument(self
, node
, other
):
824 return (self
.attributeEq_option_argument(node
, other
)
825 and self
.childrenEq(node
, other
))
827 ###########################################################################
828 # A change in certain elements must propagate the change up since
829 # they may occur only once. Must be done by parents.
831 # Checks whether `node` and `other` have both a node of type
832 # `childClass` and whether the first of thosee are equal.
833 def rootEqWithChild(self
, node
, other
, childClass
):
834 if node
.__class
__ != other
.__class
__:
838 for nodeChild
in self
.getChildren(node
):
839 if isinstance(nodeChild
, childClass
):
840 nodeFound
= nodeChild
844 for otherChild
in self
.getChildren(other
):
845 if isinstance(otherChild
, childClass
):
846 otherFound
= otherChild
849 if nodeFound
is None or otherFound
is None:
852 return self
.childEq(nodeFound
, otherFound
)
854 ###########################################################################
857 def rootEq_footnote(self
, node
, other
):
858 return self
.rootEqWithChild(node
, other
, nodes
.label
)
860 ###########################################################################
863 def rootEq_citation(self
, node
, other
):
864 return self
.rootEqWithChild(node
, other
, nodes
.label
)
866 ###########################################################################
869 def rootEq_option(self
, node
, other
):
870 return self
.rootEqWithChild(node
, other
, nodes
.option_string
)
872 ###########################################################################
873 # Some attributes of some elements depend on their concrete parents.
876 def copyRoot_tgroup(self
, node
):
878 copy
['origcols'] = copy
['cols']
882 def addChild_tgroup(self
, root
, child
):
884 # This works only if for each column there is a `colspec`. Is
886 if isinstance(child
, nodes
.colspec
):
888 elif isinstance(child
, nodes
.tbody
):
889 # All columns seen - check the column widths
890 if root
['origcols'] != root
['cols']:
892 if isinstance(elem
, nodes
.colspec
):
893 elem
['colwidth'] = 100 / root
['cols']
896 # TODO Number of entries must change according to the (changed)
897 # number of columns; for added or removed columns entries of *one*
898 # column must be added / removed
900 ###############################################################################
901 ###############################################################################
904 def processCommandLine():
905 """Process command line and return a `Publisher`."""
906 # Determine writer here so options can be given normally
907 preWriter
= writerDefault
909 match
= re
.search(writerArgRE1
, arg
)
911 preWriter
= match
.group(1)
913 pub
= Publisher3Args()
914 pub
.set_reader('standalone', None, 'restructuredtext')
915 pub
.set_writer(preWriter
)
917 settingsSpec
= SettingsSpec()
918 settingsSpec
.settings_spec
= settings_spec
919 settingsSpec
.settings_defaults
= settings_defaults
920 pub
.process_command_line(usage
=usage
, description
=description
,
921 settings_spec
=settingsSpec
,
922 config_section
=config_section
)
923 if pub
.settings
.writer
!= preWriter
:
924 new_reporter('<cmdline>',
925 pub
.settings
).severe("Internal error: Mismatch of pre-parsed (%r) and real (%r) writer"
926 % ( preWriter
, pub
.settings
.writer
, ))
927 pub
.set_destination()
930 def readTree(pub
, sourceName
):
931 """Read and return a tree from `sourceName`."""
932 # Reset reader - just in case it keeps state from a previous invocation
933 pub
.set_reader('standalone', None, 'restructuredtext')
934 pub
.set_source(None, sourceName
)
936 pub
.document
= pub
.reader
.read(pub
.source
, pub
.parser
, pub
.settings
)
937 pub
.apply_transforms()
940 def doDiff(hashableNodeImpl
, oldTree
, newTree
):
941 """Create a difference from `oldTree` to `newTree` using
942 `hashableNodeImpl`. Returns the opcodes necessary to transform
943 `oldTree` to `newTree`."""
944 matcher
= TreeMatcher(hashableNodeImpl
, oldTree
, newTree
,
945 lambda node
: isinstance(node
, White
))
946 return matcher
.get_opcodes()
948 def buildDocument(oldTree
, newTree
, settings
):
949 """Returns a new document for the result of converting `oldTree` to
951 if (not isinstance(oldTree
, docutils
.nodes
.document
)
952 or not isinstance(newTree
, docutils
.nodes
.document
)):
953 raise TypeError("Roots of trees must be documents")
954 return new_document(u
"%s => %s"
955 % ( settings
._old
_source
, settings
._new
_source
, ),
958 def buildTree(dispatcher
, diffRoot
, opcodes
, oldRoot
, newRoot
):
959 """Adds a new sub-tree under `diffRoot` converting children of
960 `oldRoot` to `newRoot` using `opcodes`."""
961 oldChildren
= dispatcher
.getChildren(oldRoot
)
962 newChildren
= dispatcher
.getChildren(newRoot
)
963 for opcode
in opcodes
:
964 ( command
, oldRange
, newRange
,
965 subOpcodes
, ) = Opcode(opcode
).resolveOpcode(oldChildren
, newChildren
)
966 if command
== Opcode
.Descend
:
967 child
= dispatcher
.copyRoot(oldRange
[0])
968 dispatcher
.addChild(diffRoot
, child
)
969 buildTree(dispatcher
, child
,
970 subOpcodes
, oldRange
[0], newRange
[0])
972 dispatcher
.mergeChildren(diffRoot
, oldRoot
, newRoot
,
973 command
, oldRange
, newRange
)
975 # A replacement in certain elements must not be propagated up since
976 # they may occur only once and replacement would double them
977 replaceNotUp
= ( nodes
.title
, nodes
.subtitle
, nodes
.term
, nodes
.field_name
,
978 nodes
.attribution
, nodes
.caption
, # (%text.model)
979 nodes
.header
, nodes
.footer
, nodes
.definition
,
980 nodes
.field_body
, nodes
.description
, nodes
.legend
,
981 nodes
.entry
, # (%body.elements;+) or (%body.elements;*)
982 nodes
.decoration
, nodes
.docinfo
, nodes
.transition
,
983 nodes
.option_group
, nodes
.thead
,
984 nodes
.tbody
, # different content model
987 # A replacement in certain elements normally not subject to up
988 # propagation and contained in certain elements may propagate up if
989 # all their siblings are also replacements and would propagate up
990 replaceUpSiblings
= (
991 ( nodes
.title
, nodes
.section
, ),
992 ( nodes
.subtitle
, nodes
.section
, ),
993 ( nodes
.term
, nodes
.definition_list_item
, ),
994 ( nodes
.field_name
, nodes
.field
, ),
995 ( nodes
.attribution
, nodes
.block_quote
, ),
996 ( nodes
.caption
, nodes
.figure
, ),
997 ( nodes
.definition
, nodes
.definition_list_item
, ),
998 ( nodes
.field_body
, nodes
.field
, ),
999 ( nodes
.description
, nodes
.option_list_item
, ),
1000 ( nodes
.legend
, nodes
.figure
, ),
1001 ( nodes
.option_group
, nodes
.option_list_item
, ),
1004 # TODO If much text is replaced in a text element the whole element
1005 # should be replaced. This makes more sense to people than two large
1006 # replaced/replacement blocks where the only equality is in words like
1007 # "the". The exact meaning of "much" should be an option.
1008 def cleanOpcodes(opcodes
, dispatcher
, oldList
, newList
):
1009 """Replace some nasty results in `opcodes` by cleaner versions. Opcodes
1010 create `newList` from `oldList`."""
1011 mightReplaceUpSiblings
= [ ]
1012 for i
in range(len(opcodes
)):
1013 opcode
= Opcode(opcodes
[i
])
1014 ( command
, oldRange
, newRange
, subOpcodes
,
1015 ) = opcode
.resolveOpcode(oldList
, newList
)
1017 # Nothing to clean for flat or empty opcodes
1020 oldNode
= oldRange
[0]
1021 newNode
= newRange
[0]
1022 cleanOpcodes(subOpcodes
, dispatcher
, dispatcher
.getChildren(oldNode
),
1023 dispatcher
.getChildren(newNode
))
1025 while j
< len(subOpcodes
):
1026 prev
= Opcode(subOpcodes
[j
- 1])
1027 this
= Opcode(subOpcodes
[j
])
1028 if (this
.getCommand() != Opcode
.Descend
1029 and prev
.getCommand() == this
.getCommand()):
1030 # Merge adjacing opcodes of same type
1031 prevOld
= prev
.getOldRange()
1032 prevNew
= prev
.getNewRange()
1033 thisOld
= this
.getOldRange()
1034 thisNew
= this
.getNewRange()
1035 prev
.setOldRange(( prevOld
[0], thisOld
[1], ))
1036 prev
.setNewRange(( prevNew
[0], thisNew
[1], ))
1037 subOpcodes
[j
- 1:j
+ 1] = [ prev
.asTuple(), ]
1040 opcode
.setSubOpcodes(subOpcodes
)
1041 if len(subOpcodes
) == 1:
1042 subOpcode
= Opcode(subOpcodes
[0])
1043 if subOpcode
.getCommand() == Opcode
.Descend
:
1045 elif subOpcode
.getCommand() == Opcode
.Replace
:
1046 if any([ isinstance(oldNode
, cls
)
1047 for cls
in replaceNotUp
]):
1049 if any([ isinstance(oldNode
, cls
)
1050 and isinstance(oldNode
.parent
, parentCls
)
1051 for ( cls
, parentCls
, ) in replaceUpSiblings
]):
1052 # If for instance a section/title would
1053 # propagate a replacement up the propagation
1054 # needs to be done if all siblings would
1055 # also propagate a replacement up
1056 mightReplaceUpSiblings
.append(i
)
1062 # Propagate 1-element sequences up
1063 opcode
.setCommand(subOpcode
.getCommand())
1064 opcodes
[i
] = opcode
.asTuple()
1066 if mightReplaceUpSiblings
:
1067 # There are entries which might propagate a replace up if all
1068 # siblings could do as well
1069 if all([ i
in mightReplaceUpSiblings
1070 or Opcode(opcodes
[i
]).getCommand() == Opcode
.Replace
1071 for i
in range(len(opcodes
)) ]):
1072 # All entries are replacements which may propagate up -
1073 # actually propagate elements which may propagate
1074 for i
in mightReplaceUpSiblings
:
1075 opcode
= Opcode(opcodes
[i
])
1076 opcode
.setCommand(Opcode
.Replace
)
1077 opcodes
[i
] = opcode
.asTuple()
1079 def createDiff(pub
, oldTree
, newTree
):
1080 """Create and return a diff document from `oldTree` to `newTree`."""
1081 realDebug
= pub
.settings
.debug
1082 pub
.settings
.debug
= pub
.settings
.dump_rstdiff
1083 reporter
= new_reporter("RSTDIFF", pub
.settings
)
1084 pub
.settings
.debug
= realDebug
1085 dispatcher
= DocutilsDispatcher(reporter
)
1086 opcodes
= doDiff(dispatcher
, oldTree
, newTree
)
1088 if pub
.settings
.dump_rstdiff
:
1089 reporter
.debug(oldTree
.asdom().toprettyxml())
1090 reporter
.debug(newTree
.asdom().toprettyxml())
1091 reporter
.debug(pformat(opcodes
, 2, 40, None))
1092 reporter
.debug("^^^ Before cleaning vvv After cleaning")
1094 cleanOpcodes(opcodes
, dispatcher
, [ oldTree
], [ newTree
])
1096 if pub
.settings
.dump_rstdiff
:
1097 reporter
.debug(pformat(opcodes
, 2, 40, None))
1099 if len(opcodes
) != 1:
1100 raise TypeError("Don't know how to merge documents which are not rootEq")
1101 opcode
= Opcode(opcodes
[0])
1102 if opcode
.getCommand() not in ( Opcode
.Descend
, Opcode
.Equal
, ):
1103 # TODO There should be a sense making message for this case
1104 # because this may happen due to up propagation of replacements
1105 raise TypeError("Don't know how to merge top level opcode of type %r"
1106 % ( opcode
.getCommand(), ))
1108 diffDoc
= buildDocument(oldTree
, newTree
, pub
.settings
)
1109 if opcode
.getCommand() == Opcode
.Equal
:
1110 # TODO Equality should be reported somehow
1111 diffDoc
.extend([ child
.deepcopy()
1112 for child
in newTree
.children
])
1114 buildTree(dispatcher
, diffDoc
, opcode
.getSubOpcodes(), oldTree
, newTree
)
1117 if __name__
== '__main__':
1118 pub
= processCommandLine()
1120 useOptions(pub
.settings
, oldOption
)
1121 oldTree
= readTree(pub
, pub
.settings
._old
_source
)
1122 useOptions(pub
.settings
, newOption
)
1123 newTree
= readTree(pub
, pub
.settings
._new
_source
)
1124 useOptions(pub
.settings
, bothOption
)
1126 Text2Words(oldTree
).apply()
1127 Text2Words(newTree
).apply()
1129 diffDoc
= createDiff(pub
, oldTree
, newTree
)
1130 Words2Text(diffDoc
).apply()
1132 pub
.writer
.write(diffDoc
, pub
.destination
)
1133 pub
.writer
.assemble_parts()
1135 # TODO The CSS classes need to be set in a CSS stylesheet