2 (** Splitting a string according to a character separator
4 This problem was suggested by Georges-Axel Jaloyan (Amazon Web Services),
5 during a talk at the first Dafny workshop (London, 2024).
7 The problem is stated as follows: split a string `s` according to a
8 character separator `sep`, as a sequence of substrings `[s0;s1;...sn]`
10 - the separator does not appear in any `si`;
11 - the concatenation `s0+sep+s1+sep+...+sep+sn` is equal to `s`.
13 Examples. Assuming that the separator character is '.', here are some
14 expected input/output:
16 +---------+-------------------+
18 +---------+-------------------+
21 | ".." | ["", "", ""] |
23 | "abc." | ["abc", ""] |
24 | ".abc" | ["", "abc"] |
25 | "a.bc" | ["a", "bc"] |
26 | "a..bc" | ["a", "", "bc"] |
27 +---------+-------------------+
29 Note that the output sequence is never empty.
31 Additionnally, we possibly set a limit to the total number of
32 substrings in the output. We do so with an integer parameter `limit`.
33 If `limit = -1`, there is no limit. Otherwise, `limit >= 1`.
34 When a limit is set, the last substring in the output may contain the
37 Examples. Assuming that the separator character is '.' and the limit
38 is 2, here are some expected input/output:
40 +-----------+----------------+
42 +-----------+----------------+
44 | "b.." | ["b", "."] |
45 | "b..." | ["b", ".."] |
46 | "ba.b.b." | ["ba", "b.b."] |
47 +-----------+----------------+
49 Author: Jean-Christophe FilliĆ¢tre (CNRS)
59 (** Characters and strings *)
63 val (=) (x y: char) : bool
64 ensures { result <-> x=y }
66 type string_ = seq char
70 (** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
71 for a non-empty sequence of strings *)
72 let rec function concat (ss: seq string_) (sep: char) : string_
73 requires { length ss >= 1 } variant { length ss }
74 = if length ss = 1 then ss[0]
75 else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1])
77 (** a mere shortcut, for convenience *)
78 predicate notin (sep: char) (s: string_)
83 let ([]) (s: string_) (i: int) : char
84 requires { [@expl:index in string bounds] 0 <= i < length s }
87 let split_string (s: string_) (sep: char) (limit: int) : (ss: seq string_)
88 requires { limit = -1 || limit >= 1 }
89 ensures { length ss >= 1 }
90 ensures { limit = -1 || length ss <= limit }
91 ensures { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
92 ensures { length ss = limit || notin sep ss[length ss - 1] }
93 ensures { concat ss sep == s }
94 = if limit = 1 then return (singleton s);
99 invariant { 0 <= b <= i <= length s }
100 invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
101 invariant { forall j. b <= j < i -> s[j] <> sep }
102 invariant { limit = -1 || length ss < limit-1 }
103 invariant { concat (snoc ss s[b..]) sep == s }
104 variant { length s - i }
106 ss <- snoc ss s[b..i];
107 assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
108 if length ss = limit-1 then return snoc ss s[i+1..];
113 return snoc ss s[b..]
117 module SplitStringOCaml
128 (** Characters and strings *)
132 val (=) (x y: char) : bool
133 ensures { result <-> x=y }
135 type string_ = private {
136 ghost contents_: seq char;
138 meta coercion function contents_
142 (** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
143 for a non-empty sequence of strings *)
144 let rec ghost function concat (ss: seq string_) (sep: char) : seq char
145 requires { length ss >= 1 } variant { length ss }
146 = if length ss = 1 then ss[0].contents_
147 else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1].contents_)
149 (** a mere shortcut, for convenience *)
150 predicate notin (sep: char) (s: string_)
155 val length (s: string_) : int63
156 ensures { result = length s }
158 val sub (s: string_) (i j: int63) : string_
159 requires { [@expl:index in string bounds] 0 <= i <= j <= length s }
160 ensures { result = s[i..j] }
162 val ([]) (s: string_) (i: int63) : char
163 requires { [@expl:index in string bounds] 0 <= i < length s }
164 ensures { result = s[i] }
166 let split_string (s: string_) (sep: char) (limit: int63) : (ss: Q.t string_)
167 requires { limit = -1 || limit >= 1 }
168 ensures { length ss >= 1 }
169 ensures { limit = -1 || length ss <= limit }
170 ensures { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
171 ensures { length ss = limit || notin sep ss[length ss - 1] }
172 ensures { concat ss sep == s }
173 = let ref ss = Q.create () in
174 if limit = (1:int63) then (Q.push s ss; return ss);
175 let ref i = 0: int63 in
176 let ref b = 0: int63 in
177 let ghost ref suffix = s in
178 while i < length s do
179 invariant { 0 <= b <= i <= length s }
180 invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
181 invariant { forall j. b <= j < i -> s[j] <> sep }
182 invariant { limit = -1 || length ss < limit-1 }
183 invariant { suffix = s[b..] }
184 invariant { concat (snoc ss suffix) sep == s }
185 variant { length s - i }
187 Q.push (sub s b i) ss;
188 assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
189 if to_int63 (Q.length ss) = limit - (1:int63) then
190 (Q.push (sub s (i+1) (length s)) ss; return ss);
192 suffix <- sub s b (length s)
196 Q.push (sub s b (length s)) ss;