From ad10597d0a674581f1451f2979337a87937efff2 Mon Sep 17 00:00:00 2001
From: xi <xi@18f92427-320e-0410-9341-c67f048884a3>
Date: Sun, 19 Mar 2006 01:30:05 +0000
Subject: [PATCH] Loose indentation rules for the following cases:

 * In the flow context, indentation level is not checked.
 * For quoted scalars, indentation level is not checked.
 * Flow scalars are not required to have at least 1 space indentation. Zero indentation is acceptable.


git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@117 18f92427-320e-0410-9341-c67f048884a3
---
 lib/yaml/scanner.py                                | 88 +++++++++++++---------
 ...cument-separator-in-quoted-scalar.error-message | 11 +++
 ...lid-indentation-for-quoted-scalar.error-message |  2 -
 tests/data/sloppy-indentation.canonical            | 18 +++++
 tests/data/sloppy-indentation.data                 | 17 +++++
 5 files changed, 97 insertions(+), 39 deletions(-)
 create mode 100644 tests/data/document-separator-in-quoted-scalar.error-message
 delete mode 100644 tests/data/invalid-indentation-for-quoted-scalar.error-message
 create mode 100644 tests/data/sloppy-indentation.canonical
 create mode 100644 tests/data/sloppy-indentation.data

diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py
index 2b4b35f..29de348 100644
--- a/lib/yaml/scanner.py
+++ b/lib/yaml/scanner.py
@@ -336,16 +336,21 @@ class Scanner:
 
     def unwind_indent(self, column):
 
-        # In flow context, tokens should respect indentation.
-        # Actually the condition should be `self.indent >= column` according to
-        # the spec. But this condition will prohibit intuitively correct
-        # constructions such as
-        # key : {
-        # }
-        if self.flow_level and self.indent > column:
-            raise ScannerError(None, None,
-                    "invalid intendation or unclosed '[' or '{'",
-                    self.reader.get_mark())
+        ## In flow context, tokens should respect indentation.
+        ## Actually the condition should be `self.indent >= column` according to
+        ## the spec. But this condition will prohibit intuitively correct
+        ## constructions such as
+        ## key : {
+        ## }
+        #if self.flow_level and self.indent > column:
+        #    raise ScannerError(None, None,
+        #            "invalid intendation or unclosed '[' or '{'",
+        #            self.reader.get_mark())
+
+        # In the flow context, indentation is ignored. We make the scanner less
+        # restrictive then specification requires.
+        if self.flow_level:
+            return
 
         # In block context, we may need to issue the BLOCK-END tokens.
         while self.indent > column:
@@ -1119,17 +1124,19 @@ class Scanner:
 
     def scan_flow_scalar(self, double):
         # See the specification for details.
+        # Note that we loose indentation rules for quoted scalars. Quoted
+        # scalars don't need to adhere indentation because " and ' clearly
+        # mark the beginning and the end of them. Therefore we are less
+        # restrictive then the specification requires. We only need to check
+        # that document separators are not included in scalars.
         chunks = []
         start_mark = self.reader.get_mark()
-        indent = self.indent+1
-        if indent == 0:
-            indent = 1
         quote = self.reader.peek()
         self.reader.forward()
-        chunks.extend(self.scan_flow_scalar_non_spaces(double, indent, start_mark))
+        chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
         while self.reader.peek() != quote:
-            chunks.extend(self.scan_flow_scalar_spaces(double, indent, start_mark))
-            chunks.extend(self.scan_flow_scalar_non_spaces(double, indent, start_mark))
+            chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
+            chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
         self.reader.forward()
         end_mark = self.reader.get_mark()
         return ScalarToken(u''.join(chunks), False, start_mark, end_mark)
@@ -1160,7 +1167,7 @@ class Scanner:
         u'U':   8,
     }
 
-    def scan_flow_scalar_non_spaces(self, double, indent, start_mark):
+    def scan_flow_scalar_non_spaces(self, double, start_mark):
         # See the specification for details.
         chunks = []
         while True:
@@ -1196,14 +1203,14 @@ class Scanner:
                     self.reader.forward(length)
                 elif ch in u'\r\n\x85\u2028\u2029':
                     self.scan_line_break()
-                    chunks.extend(self.scan_flow_scalar_breaks(double, indent, start_mark))
+                    chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
                 else:
                     raise ScannerError("while scanning a double-quoted scalar", start_mark,
                             "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_mark())
             else:
                 return chunks
 
-    def scan_flow_scalar_spaces(self, double, indent, start_mark):
+    def scan_flow_scalar_spaces(self, double, start_mark):
         # See the specification for details.
         chunks = []
         length = 0
@@ -1217,7 +1224,7 @@ class Scanner:
                     "found unexpected end of stream", self.reader.get_mark())
         elif ch in u'\r\n\x85\u2028\u2029':
             line_break = self.scan_line_break()
-            breaks = self.scan_flow_scalar_breaks(double, indent, start_mark)
+            breaks = self.scan_flow_scalar_breaks(double, start_mark)
             if line_break != u'\n':
                 chunks.append(line_break)
             elif not breaks:
@@ -1227,21 +1234,17 @@ class Scanner:
             chunks.append(whitespaces)
         return chunks
 
-    def scan_flow_scalar_breaks(self, double, indent, start_mark):
+    def scan_flow_scalar_breaks(self, double, start_mark):
         # See the specification for details.
         chunks = []
         while True:
-            while self.reader.column < indent and self.reader.peek() == u' ':
-                self.reader.forward()
-            if self.reader.column < indent  \
-                    and self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
-                s = 's'
-                if indent == 1:
-                    s = ''
+            # Instead of checking indentation, we check for document
+            # separators.
+            prefix = self.reader.prefix(3)
+            if (prefix == u'---' or prefix == u'...')   \
+                    and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
                 raise ScannerError("while scanning a quoted scalar", start_mark,
-                        "expected %d space%s indentation, but found %r"
-                        % (indent, s, self.reader.peek().encode('utf-8')),
-                        self.reader.get_mark())
+                        "found unexpected document separator", self.reader.get_mark())
             while self.reader.peek() in u' \t':
                 self.reader.forward()
             if self.reader.peek() in u'\r\n\x85\u2028\u2029':
@@ -1252,14 +1255,17 @@ class Scanner:
     def scan_plain(self):
         # See the specification for details.
         # We add an additional restriction for the flow context:
-        #   plain scalars in the flow context cannot contain ':' and '?'.
+        #   plain scalars in the flow context cannot contain ',', ':' and '?'.
         # We also keep track of the `allow_simple_key` flag here.
+        # Indentation rules are loosed for the flow context.
         chunks = []
         start_mark = self.reader.get_mark()
         end_mark = start_mark
         indent = self.indent+1
-        if indent == 0:
-            indent = 1
+        # We allow zero indentation for scalars, but then we need to check for
+        # document separators at the beginning of the line.
+        #if indent == 0:
+        #    indent = 1
         spaces = []
         while True:
             length = 0
@@ -1280,13 +1286,13 @@ class Scanner:
             chunks.append(self.reader.prefix(length))
             self.reader.forward(length)
             end_mark = self.reader.get_mark()
-            spaces = self.scan_plain_spaces(indent)
+            spaces = self.scan_plain_spaces(indent, start_mark)
             if not spaces or self.reader.peek() == u'#' \
-                    or self.reader.column < indent:
+                    or (not self.flow_level and self.reader.column < indent):
                 break
         return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
 
-    def scan_plain_spaces(self, indent):
+    def scan_plain_spaces(self, indent, start_mark):
         # See the specification for details.
         # The specification is really confusing about tabs in plain scalars.
         # We just forbid them completely. Do not use tabs in YAML!
@@ -1300,12 +1306,20 @@ class Scanner:
         if ch in u'\r\n\x85\u2028\u2029':
             line_break = self.scan_line_break()
             self.allow_simple_key = True
+            prefix = self.reader.prefix(3)
+            if (prefix == u'---' or prefix == u'...')   \
+                    and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+                return
             breaks = []
             while self.reader.peek() in u' \r\n\x85\u2028\u2029':
                 if self.reader.peek() == ' ':
                     self.reader.forward()
                 else:
                     breaks.append(self.scan_line_break())
+                    prefix = self.reader.prefix(3)
+                    if (prefix == u'---' or prefix == u'...')   \
+                            and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+                        return
             if line_break != u'\n':
                 chunks.append(line_break)
             elif not breaks:
diff --git a/tests/data/document-separator-in-quoted-scalar.error-message b/tests/data/document-separator-in-quoted-scalar.error-message
new file mode 100644
index 0000000..9eeb0d6
--- /dev/null
+++ b/tests/data/document-separator-in-quoted-scalar.error-message
@@ -0,0 +1,11 @@
+---
+"this --- is correct"
+---
+"this
+...is also
+correct"
+---
+"a quoted scalar
+cannot contain
+---
+document separators"
diff --git a/tests/data/invalid-indentation-for-quoted-scalar.error-message b/tests/data/invalid-indentation-for-quoted-scalar.error-message
deleted file mode 100644
index b885db3..0000000
--- a/tests/data/invalid-indentation-for-quoted-scalar.error-message
+++ /dev/null
@@ -1,2 +0,0 @@
-test: "foo
-bar"
diff --git a/tests/data/sloppy-indentation.canonical b/tests/data/sloppy-indentation.canonical
new file mode 100644
index 0000000..0d312cc
--- /dev/null
+++ b/tests/data/sloppy-indentation.canonical
@@ -0,0 +1,18 @@
+%YAML 1.1
+---
+!!map { 
+    ? !!str "in the block context"
+    : !!map {
+        ? !!str "indentation should be kept"
+        : !!map {
+            ? !!str "but in the flow context"
+            : !!seq [ !!str "it may be violated" ]
+        }
+    }
+}
+--- !!str
+"the parser does not require scalars to be indented with at least one space"
+--- !!str
+"the parser does not require scalars to be indented with at least one space"
+--- !!map
+{ ? !!str "foo": { ? !!str "bar" : "quoted scalars may not adhere indentation" } }
diff --git a/tests/data/sloppy-indentation.data b/tests/data/sloppy-indentation.data
new file mode 100644
index 0000000..2eb4f5a
--- /dev/null
+++ b/tests/data/sloppy-indentation.data
@@ -0,0 +1,17 @@
+---
+in the block context:
+    indentation should be kept: { 
+    but in the flow context: [
+it may be violated]
+}
+---
+the parser does not require scalars
+to be indented with at least one space
+...
+---
+"the parser does not require scalars
+to be indented with at least one space"
+---
+foo:
+    bar: 'quoted scalars
+may not adhere indentation'
-- 
2.11.4.GIT