From ad10597d0a674581f1451f2979337a87937efff2 Mon Sep 17 00:00:00 2001 From: xi Date: Sun, 19 Mar 2006 01:30:05 +0000 Subject: [PATCH] Loose indentation rules for the following cases: * In the flow context, indentation level is not checked. * For quoted scalars, indentation level is not checked. * Flow scalars are not required to have at least 1 space indentation. Zero indentation is acceptable. git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@117 18f92427-320e-0410-9341-c67f048884a3 --- lib/yaml/scanner.py | 88 +++++++++++++--------- ...cument-separator-in-quoted-scalar.error-message | 11 +++ ...lid-indentation-for-quoted-scalar.error-message | 2 - tests/data/sloppy-indentation.canonical | 18 +++++ tests/data/sloppy-indentation.data | 17 +++++ 5 files changed, 97 insertions(+), 39 deletions(-) create mode 100644 tests/data/document-separator-in-quoted-scalar.error-message delete mode 100644 tests/data/invalid-indentation-for-quoted-scalar.error-message create mode 100644 tests/data/sloppy-indentation.canonical create mode 100644 tests/data/sloppy-indentation.data diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py index 2b4b35f..29de348 100644 --- a/lib/yaml/scanner.py +++ b/lib/yaml/scanner.py @@ -336,16 +336,21 @@ class Scanner: def unwind_indent(self, column): - # In flow context, tokens should respect indentation. - # Actually the condition should be `self.indent >= column` according to - # the spec. But this condition will prohibit intuitively correct - # constructions such as - # key : { - # } - if self.flow_level and self.indent > column: - raise ScannerError(None, None, - "invalid intendation or unclosed '[' or '{'", - self.reader.get_mark()) + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.reader.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return # In block context, we may need to issue the BLOCK-END tokens. while self.indent > column: @@ -1119,17 +1124,19 @@ class Scanner: def scan_flow_scalar(self, double): # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. chunks = [] start_mark = self.reader.get_mark() - indent = self.indent+1 - if indent == 0: - indent = 1 quote = self.reader.peek() self.reader.forward() - chunks.extend(self.scan_flow_scalar_non_spaces(double, indent, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) while self.reader.peek() != quote: - chunks.extend(self.scan_flow_scalar_spaces(double, indent, start_mark)) - chunks.extend(self.scan_flow_scalar_non_spaces(double, indent, start_mark)) + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) self.reader.forward() end_mark = self.reader.get_mark() return ScalarToken(u''.join(chunks), False, start_mark, end_mark) @@ -1160,7 +1167,7 @@ class Scanner: u'U': 8, } - def scan_flow_scalar_non_spaces(self, double, indent, start_mark): + def scan_flow_scalar_non_spaces(self, double, start_mark): # See the specification for details. chunks = [] while True: @@ -1196,14 +1203,14 @@ class Scanner: self.reader.forward(length) elif ch in u'\r\n\x85\u2028\u2029': self.scan_line_break() - chunks.extend(self.scan_flow_scalar_breaks(double, indent, start_mark)) + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) else: raise ScannerError("while scanning a double-quoted scalar", start_mark, "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_mark()) else: return chunks - def scan_flow_scalar_spaces(self, double, indent, start_mark): + def scan_flow_scalar_spaces(self, double, start_mark): # See the specification for details. chunks = [] length = 0 @@ -1217,7 +1224,7 @@ class Scanner: "found unexpected end of stream", self.reader.get_mark()) elif ch in u'\r\n\x85\u2028\u2029': line_break = self.scan_line_break() - breaks = self.scan_flow_scalar_breaks(double, indent, start_mark) + breaks = self.scan_flow_scalar_breaks(double, start_mark) if line_break != u'\n': chunks.append(line_break) elif not breaks: @@ -1227,21 +1234,17 @@ class Scanner: chunks.append(whitespaces) return chunks - def scan_flow_scalar_breaks(self, double, indent, start_mark): + def scan_flow_scalar_breaks(self, double, start_mark): # See the specification for details. chunks = [] while True: - while self.reader.column < indent and self.reader.peek() == u' ': - self.reader.forward() - if self.reader.column < indent \ - and self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': - s = 's' - if indent == 1: - s = '' + # Instead of checking indentation, we check for document + # separators. + prefix = self.reader.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': raise ScannerError("while scanning a quoted scalar", start_mark, - "expected %d space%s indentation, but found %r" - % (indent, s, self.reader.peek().encode('utf-8')), - self.reader.get_mark()) + "found unexpected document separator", self.reader.get_mark()) while self.reader.peek() in u' \t': self.reader.forward() if self.reader.peek() in u'\r\n\x85\u2028\u2029': @@ -1252,14 +1255,17 @@ class Scanner: def scan_plain(self): # See the specification for details. # We add an additional restriction for the flow context: - # plain scalars in the flow context cannot contain ':' and '?'. + # plain scalars in the flow context cannot contain ',', ':' and '?'. # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. chunks = [] start_mark = self.reader.get_mark() end_mark = start_mark indent = self.indent+1 - if indent == 0: - indent = 1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 spaces = [] while True: length = 0 @@ -1280,13 +1286,13 @@ class Scanner: chunks.append(self.reader.prefix(length)) self.reader.forward(length) end_mark = self.reader.get_mark() - spaces = self.scan_plain_spaces(indent) + spaces = self.scan_plain_spaces(indent, start_mark) if not spaces or self.reader.peek() == u'#' \ - or self.reader.column < indent: + or (not self.flow_level and self.reader.column < indent): break return ScalarToken(u''.join(chunks), True, start_mark, end_mark) - def scan_plain_spaces(self, indent): + def scan_plain_spaces(self, indent, start_mark): # See the specification for details. # The specification is really confusing about tabs in plain scalars. # We just forbid them completely. Do not use tabs in YAML! @@ -1300,12 +1306,20 @@ class Scanner: if ch in u'\r\n\x85\u2028\u2029': line_break = self.scan_line_break() self.allow_simple_key = True + prefix = self.reader.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return breaks = [] while self.reader.peek() in u' \r\n\x85\u2028\u2029': if self.reader.peek() == ' ': self.reader.forward() else: breaks.append(self.scan_line_break()) + prefix = self.reader.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return if line_break != u'\n': chunks.append(line_break) elif not breaks: diff --git a/tests/data/document-separator-in-quoted-scalar.error-message b/tests/data/document-separator-in-quoted-scalar.error-message new file mode 100644 index 0000000..9eeb0d6 --- /dev/null +++ b/tests/data/document-separator-in-quoted-scalar.error-message @@ -0,0 +1,11 @@ +--- +"this --- is correct" +--- +"this +...is also +correct" +--- +"a quoted scalar +cannot contain +--- +document separators" diff --git a/tests/data/invalid-indentation-for-quoted-scalar.error-message b/tests/data/invalid-indentation-for-quoted-scalar.error-message deleted file mode 100644 index b885db3..0000000 --- a/tests/data/invalid-indentation-for-quoted-scalar.error-message +++ /dev/null @@ -1,2 +0,0 @@ -test: "foo -bar" diff --git a/tests/data/sloppy-indentation.canonical b/tests/data/sloppy-indentation.canonical new file mode 100644 index 0000000..0d312cc --- /dev/null +++ b/tests/data/sloppy-indentation.canonical @@ -0,0 +1,18 @@ +%YAML 1.1 +--- +!!map { + ? !!str "in the block context" + : !!map { + ? !!str "indentation should be kept" + : !!map { + ? !!str "but in the flow context" + : !!seq [ !!str "it may be violated" ] + } + } +} +--- !!str +"the parser does not require scalars to be indented with at least one space" +--- !!str +"the parser does not require scalars to be indented with at least one space" +--- !!map +{ ? !!str "foo": { ? !!str "bar" : "quoted scalars may not adhere indentation" } } diff --git a/tests/data/sloppy-indentation.data b/tests/data/sloppy-indentation.data new file mode 100644 index 0000000..2eb4f5a --- /dev/null +++ b/tests/data/sloppy-indentation.data @@ -0,0 +1,17 @@ +--- +in the block context: + indentation should be kept: { + but in the flow context: [ +it may be violated] +} +--- +the parser does not require scalars +to be indented with at least one space +... +--- +"the parser does not require scalars +to be indented with at least one space" +--- +foo: + bar: 'quoted scalars +may not adhere indentation' -- 2.11.4.GIT