From 7521c3ee91a8c52cc152de6fa8b4c88b44fc3e33 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 20 Jul 2015 12:05:44 -0400 Subject: [PATCH] Document the torrc format as thoroughly as possible Closes ticket 2325 --- changes/ticket2325 | 7 ++ doc/include.am | 3 +- doc/torrc_format.txt | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/common/util.c | 33 +------- 4 files changed, 218 insertions(+), 32 deletions(-) create mode 100644 changes/ticket2325 create mode 100644 doc/torrc_format.txt diff --git a/changes/ticket2325 b/changes/ticket2325 new file mode 100644 index 0000000000..b96e514ae2 --- /dev/null +++ b/changes/ticket2325 @@ -0,0 +1,7 @@ + o Documentation: + - Include a specific and (hopefully) accurate documentation of the torrc + file's meta-format in doc/torrc_format.txt. This is mainly of + interest to people writing programs to parse or generate torrc files. + This document is not a commitment to long-term compatibility; + some aspects of the current format are a bit ridiculous. + Closes ticket 2325. diff --git a/doc/include.am b/doc/include.am index 783aa95c4e..af99501502 100644 --- a/doc/include.am +++ b/doc/include.am @@ -36,7 +36,8 @@ endif EXTRA_DIST+= doc/HACKING doc/asciidoc-helper.sh \ $(html_in) $(man_in) $(txt_in) \ - doc/state-contents.txt + doc/state-contents.txt \ + doc/torrc_format.txt docdir = @docdir@ diff --git a/doc/torrc_format.txt b/doc/torrc_format.txt new file mode 100644 index 0000000000..3ca187fd29 --- /dev/null +++ b/doc/torrc_format.txt @@ -0,0 +1,207 @@ + +This document specifies the current format and semantics of the torrc +file, as of July 2015. Note that we make no guarantee about the +stability of this format. If you write something designed for strict +compatibility with this document, please expect us to break it sooner or +later. + +Yes, some of this is quite stupid. My goal here is to explain what it +does, not what it should do. + + - Nick + + + +1. File Syntax + + # A file is interpreted as every Entry in the file, in order. + TorrcFile = Line* + + Line = BlankLine | Entry + + BlankLine = WS* OptComment LF + | WS* LF + + OptComment = + | Comment + + Comment = '#' NonLF* + + # Each Entry is interpreted as an optional "Magic" flag, a key, and a + # value. + Entry = SP* OptMagic Key (SP+ | '\\' NL SP*)+ Val LF + | SP* OptMagic Key (SP* | '\\' NL SP*)* LF + + OptMagic = + | "+" + | "/" + + # Keys are always specified verbatim. They are case insensitive. It + # is an error to specify a key that Tor does not recognize. + Key = KC* + + # Sadly, every kind of value is decoded differently... + Val = QuotedVal | ContinuedVal | PlainVal + + # The text of a PlainVal is the text of its PVBody portion, + # plus the optional trailing backslash. + PlainVal = PVBody* ('\\')? SP* OptComment + + # Note that a PVBody is copied verbatim. Slashes are included + # verbatim. No changes are made. Note that a body may be empty. + PVBody = (VC | '\\' NonLF ) * + + # The text of a ContinuedVal is the text of each of its PVBody + # sub-elements, in order, concatenated. + ContinuedVal = CVal1 CVal2* CVal3 + + CVal1 = PVBody '\\' LF + CVal2 = PVBody ( '\\' LF | Comment LF ) + CVal3 = PVBody + + # The text of a QuotedVal is decoded as if it were a C string. + QuotedVal = DQ QVBody DQ SP* Comment + + QVBody = QC + | '\\' ( 'n' | 'r' | 't' | '\\' | '\'' | DQ | 'x' XD XD | OD OD? OD? ) + + XD = any hexadecimal digit + OD = any octal digit + + NonLF = Any character but '\n' + LF = '\n' | EOF + WS = ' ' | '\t' | '\r' + SP = ' ' | '\t' + DQ = '\"' + KC = Any character except an isspace() character or '#' + VC = Any character except '\\', '\n', or '#' + QC = Any character except '\n', '\\', or '\"' + +2. Mid-level Semantics + + + There are four configuration "domains", from lowest to highest priority: + + * Built-in defaults + * The "torrc_defaults" file, if any + * The "torrc" file, if any + * Arguments provided on the command line, if any. + + Normally, values from high-priority domains override low-priority + domains, but see 'magic' below. + + Configuration keys fall into three categories: singletons, lists, and + groups. + + A singleton key may appear at most once in any domain. Its + corresponding value is equal to its value in the highest-priority + domain in which it occurs. + + A list key may appear any number of times in a domain. By default, + its corresponding value is equal to all of the values specified for + it in the highest-priority domain in which it appears. (See 'magic' + below). + + A group key may appear any number of times in a domain. It is + associated with a number of other keys in the same group. The + relative positions of entries with the keys in a single group + matters, but entries with keys not in the group may be freely + interspersed. By default, the group has a value equal to all keys + and values it contains, from the highest-priority domain in which any + of its keys occurs. + + Magic: + + If the '/' flag is specified for an entry, it sets the value for + that entry to an empty list. (This will cause a higher-priority + domain to clear a list from a lower-priority domain, without + actually adding any entries.) + + If the '+' flag is specified for the first entry in a list or a + group that appears in a given domain, that list or group is + appended to the list or group from the next-lowest-priority + domain, rather than replacing it. + +3. High-level semantics + + There are further constraints on the values that each entry can take. + These constraints are out-of-scope for this document. + +4. Examples + + (Indentation is removed in this section, to avoid confusion.) + +4.1. Syntax examples + +# Here is a simple configuration entry. The key is "Foo"; the value is +# "Bar" + +Foo Bar + +# A configuration entry can have spaces in its value, as below. Here the +# key is "Foo" and the value is "Bar Baz" +Foo Bar Baz + +# This configuration entry has space at the end of the line, but those +# spaces don't count, so the key and value are still "Foo" and "Bar Baz" +Foo Bar Baz + +# There can be an escaped newline between the value and the key. This +# is another way to say key="Hello", value="World" +Hello\ +World + +# In regular entries of this kind, you can have a comment at the end of +# the line, either with a space before it or not. Each of these is a +# different spelling of key="Hello", value="World" + +Hello World #today +Hello World#tomorrow + +# One way to encode a complex entry is as a C string. This is the same +# as key="Hello", value="World!" +Hello "World!" + +# The string can contain the usual set of C escapes. This entry has +# key="Hello", and value="\"World\"\nand\nuniverse" +Hello "\"World\"\nand\nuniverse" + +# And now we get to the more-or-less awful part. +# +# Multi-line entries ending with a backslash on each line aren't so +# bad. The backslash is removed, and everything else is included +# verbatim. So this entry has key="Hello" and value="Worldandfriends" +Hello\ +World\ +and\ +friends + +# Backslashes in the middle of a line are included as-is. The key of +# this one is "Too" and the value is "Many\\Backsl\ashes here" (with +# backslashes in that last string as-is) +Too \ +Many\\\ +Backsl\ashes \\ +here + +# And here's the really yucky part. If a comment appears in a multi-line +# entry, the entry is still able to continue on the next line, as in the +# following, where the key is "This" and the value is +# "entry and some are silly" +This entry \ + # has comments \ + and some \ + are # generally \ + silly + +# But you can also write that without the backslashes at the end of the +# comment lines. That is to say, this entry is exactly the same as the +# one above! +This entry \ + # has comments + and some \ + are # generally + silly + + + diff --git a/src/common/util.c b/src/common/util.c index a5b5488b0a..618e6a1b6a 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -2829,38 +2829,9 @@ parse_config_line_from_str_verbose(const char *line, char **key_out, char **value_out, const char **err_out) { - /* I believe the file format here is supposed to be: - FILE = (EMPTYLINE | LINE)* (EMPTYLASTLINE | LASTLINE)? - - EMPTYLASTLINE = SPACE* | COMMENT - EMPTYLINE = EMPTYLASTLINE NL - SPACE = ' ' | '\r' | '\t' - COMMENT = '#' NOT-NL* - NOT-NL = Any character except '\n' - NL = '\n' - - LASTLINE = SPACE* KEY SPACE* VALUES - LINE = LASTLINE NL - KEY = KEYCHAR+ - KEYCHAR = Any character except ' ', '\r', '\n', '\t', '#', "\" - - VALUES = QUOTEDVALUE | NORMALVALUE - QUOTEDVALUE = QUOTE QVCHAR* QUOTE EOLSPACE? - QUOTE = '"' - QVCHAR = KEYCHAR | ESC ('n' | 't' | 'r' | '"' | ESC |'\'' | OCTAL | HEX) - ESC = "\\" - OCTAL = ODIGIT (ODIGIT ODIGIT?)? - HEX = ('x' | 'X') HEXDIGIT HEXDIGIT - ODIGIT = '0' .. '7' - HEXDIGIT = '0'..'9' | 'a' .. 'f' | 'A' .. 'F' - EOLSPACE = SPACE* COMMENT? - - NORMALVALUE = (VALCHAR | ESC ESC_IGNORE | CONTINUATION)* EOLSPACE? - VALCHAR = Any character except ESC, '#', and '\n' - ESC_IGNORE = Any character except '#' or '\n' - CONTINUATION = ESC NL ( COMMENT NL )* + /* + See torrc_format.txt for a description of the (silly) format this parses. */ - const char *key, *val, *cp; int continuation = 0; -- 2.11.4.GIT