examples/split_string.mlw

   1
   2 (** Splitting a string according to a character separator
   3
   4   This problem was suggested by Georges-Axel Jaloyan (Amazon Web Services),
   5   during a talk at the first Dafny workshop (London, 2024).
   6
   7   The problem is stated as follows: split a string `s` according to a
   8   character separator `sep`, as a sequence of substrings `[s0;s1;...sn]`
   9   where
  10   - the separator does not appear in any `si`;
  11   - the concatenation `s0+sep+s1+sep+...+sep+sn` is equal to `s`.
  12
  13   Examples. Assuming that the separator character is '.', here are some
  14   expected input/output:
  15
  16     +---------+-------------------+
  17     | input   | output            |
  18     +---------+-------------------+
  19     | ""      |  [""]             |
  20     | "."     |  ["", ""]         |
  21     | ".."    |  ["", "", ""]     |
  22     | "abc"   |  ["abc"]          |
  23     | "abc."  |  ["abc", ""]      |
  24     | ".abc"  |  ["", "abc"]      |
  25     | "a.bc"  |  ["a", "bc"]      |
  26     | "a..bc" |  ["a", "", "bc"]  |
  27     +---------+-------------------+
  28
  29   Note that the output sequence is never empty.
  30
  31   Additionnally, we possibly set a limit to the total number of
  32   substrings in the output. We do so with an integer parameter `limit`.
  33   If `limit = -1`, there is no limit. Otherwise, `limit >= 1`.
  34   When a limit is set, the last substring in the output may contain the
  35   separator.
  36
  37   Examples. Assuming that the separator character is '.' and the limit
  38   is 2, here are some expected input/output:
  39
  40     +-----------+----------------+
  41     | input     | output         |
  42     +-----------+----------------+
  43     | "a."      | ["a", ""]      |
  44     | "b.."     | ["b", "."]     |
  45     | "b..."    | ["b", ".."]    |
  46     | "ba.b.b." | ["ba", "b.b."] |
  47     +-----------+----------------+
  48
  49   Author: Jean-Christophe Filliâtre (CNRS)
  50 *)
  51
  52 module SplitString
  53
  54   use int.Int
  55   use seq.Seq
  56   use seq.FreeMonoid
  57   use seq.Mem
  58
  59   (** Characters and strings *)
  60
  61   type char
  62
  63   val (=) (x y: char) : bool
  64     ensures { result <-> x=y }
  65
  66   type string_ = seq char
  67
  68   (** Specification *)
  69
  70   (** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
  71       for a non-empty sequence of strings *)
  72   let rec function concat (ss: seq string_) (sep: char) : string_
  73     requires { length ss >= 1 } variant { length ss }
  74   = if length ss = 1 then ss[0]
  75     else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1])
  76
  77   (** a mere shortcut, for convenience *)
  78   predicate notin (sep: char) (s: string_)
  79   = not (mem sep s)
  80
  81   (** Code *)
  82
  83   let ([]) (s: string_) (i: int) : char
  84     requires { [@expl:index in string bounds] 0 <= i < length s }
  85   = get s i
  86
  87   let split_string (s: string_) (sep: char) (limit: int) : (ss: seq string_)
  88     requires { limit = -1 || limit >= 1 }
  89     ensures  { length ss >= 1 }
  90     ensures  { limit = -1 || length ss <= limit }
  91     ensures  { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
  92     ensures  { length ss = limit || notin sep ss[length ss - 1] }
  93     ensures  { concat ss sep == s }
  94   = if limit = 1 then return (singleton s);
  95     let ref ss = empty in
  96     let ref i = 0 in
  97     let ref b = 0 in
  98     while i < length s do
  99       invariant { 0 <= b <= i <= length s }
 100       invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
 101       invariant { forall j. b <= j < i -> s[j] <> sep }
 102       invariant { limit = -1 || length ss < limit-1 }
 103       invariant { concat (snoc ss s[b..]) sep == s }
 104       variant   { length s - i }
 105       if s[i] = sep then (
 106         ss <- snoc ss s[b..i];
 107         assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
 108         if length ss = limit-1 then return snoc ss s[i+1..];
 109         b <- i+1;
 110       );
 111       i <- i+1
 112     done;
 113     return snoc ss s[b..]
 114
 115 end
 116
 117 module SplitStringOCaml
 118
 119   use int.Int
 120   use seq.Seq
 121   use seq.FreeMonoid
 122   use seq.Mem
 123   use mach.int.Int63
 124   use mach.peano.Peano
 125   use mach.peano.Int63
 126   use queue.Queue as Q
 127
 128   (** Characters and strings *)
 129
 130   type char
 131
 132   val (=) (x y: char) : bool
 133     ensures { result <-> x=y }
 134
 135   type string_ = private {
 136     ghost contents_: seq char;
 137   }
 138   meta coercion function contents_
 139
 140   (** Specification *)
 141
 142   (** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
 143       for a non-empty sequence of strings *)
 144   let rec ghost function concat (ss: seq string_) (sep: char) : seq char
 145     requires { length ss >= 1 } variant { length ss }
 146   = if length ss = 1 then ss[0].contents_
 147     else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1].contents_)
 148
 149   (** a mere shortcut, for convenience *)
 150   predicate notin (sep: char) (s: string_)
 151   = not (mem sep s)
 152
 153   (** Code *)
 154
 155   val length (s: string_) : int63
 156     ensures  { result = length s }
 157
 158   val sub (s: string_) (i j: int63) : string_
 159     requires { [@expl:index in string bounds] 0 <= i <= j <= length s }
 160     ensures  { result = s[i..j] }
 161
 162   val ([]) (s: string_) (i: int63) : char
 163     requires { [@expl:index in string bounds] 0 <= i < length s }
 164     ensures  { result = s[i] }
 165
 166   let split_string (s: string_) (sep: char) (limit: int63) : (ss: Q.t string_)
 167     requires { limit = -1 || limit >= 1 }
 168     ensures  { length ss >= 1 }
 169     ensures  { limit = -1 || length ss <= limit }
 170     ensures  { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
 171     ensures  { length ss = limit || notin sep ss[length ss - 1] }
 172     ensures  { concat ss sep == s }
 173   = let ref ss = Q.create () in
 174     if limit = (1:int63) then (Q.push s ss; return ss);
 175     let ref i = 0: int63 in
 176     let ref b = 0: int63 in
 177     let ghost ref suffix = s in
 178     while i < length s do
 179       invariant { 0 <= b <= i <= length s }
 180       invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
 181       invariant { forall j. b <= j < i -> s[j] <> sep }
 182       invariant { limit = -1 || length ss < limit-1 }
 183       invariant { suffix = s[b..] }
 184       invariant { concat (snoc ss suffix) sep == s }
 185       variant   { length s - i }
 186       if s[i] = sep then (
 187         Q.push (sub s b i) ss;
 188         assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
 189         if to_int63 (Q.length ss) = limit - (1:int63) then
 190           (Q.push (sub s (i+1) (length s)) ss; return ss);
 191         b <- i+1;
 192         suffix <- sub s b (length s)
 193       );
 194       i <- i+1
 195     done;
 196     Q.push (sub s b (length s)) ss;
 197     return ss
 198
 199 end
 200