main.myr

   1 use std
   2
   3 const main = {
   4         match std.fslurp(0)
   5         | `std.Err e:
   6                 std.fput(2, "Error: {}\n", e)
   7         | `std.Ok s:
   8                 regurgitate(digest(ingest(s)), 72)
   9         ;;
  10 }
  11
  12 type paragraph = struct
  13         first_line_prefix : char[:]
  14         gen_line_prefix : char[:]
  15         content : char[:]
  16         empty : bool
  17         merged : bool
  18 ;;
  19
  20 type state = union
  21         `Reading_prefix
  22         `Reading_line
  23         `Reading_line_last_was_ws
  24 ;;
  25
  26 /*
  27    The algorithm:
  28
  29    Read each line, and strip off the prefix (whitespace and markers like
  30    -/•/·/#/45:/_1_/) from the content. Each line becomes its own
  31    paragraph. If the content is all whitespace, the paragraph is
  32    considered empty.
  33
  34    Now collapse all paragraphs:
  35
  36     - Empty paragraphs with equivalent prefixes collapse together.
  37
  38     - Non-empty paragraphs with equivalent prefixes collapse together.
  39
  40     - If two adjacent, non-empty paragraphs A, B have non-equivalent
  41       prefixes, but the prefix of A is a prefix of the prefix of B, then
  42       we probably made an unfortunate mistake: they're just using an
  43       exotic character in the middle of a paragraph, and by bad luck
  44       that character starts the line. So transfer the suffix of B's
  45       prefix to B's content and collapse A and B together.
  46
  47     - If two adjacent, non-empty paragraphs A, B have non-equivalent
  48       prefixes, A is preceded by an empty paragraph (or nothing), B is
  49       succeded by an empty paragraph (or nothing), AND A itself was not
  50       collapsed, then merge A and B, with A governing the
  51       first_line_prefix and B the gen_line_prefix.
  52
  53    Now output. That's easy, you made Ori take your stupid Unicode tables
  54    so you know what the cell width is.
  55  */
  56
  57 /* Turn input slop into paragraphs */
  58 const ingest = { str : byte[:]
  59         var s : state = `Reading_prefix
  60         var p : paragraph[:] = [][:]
  61         var p_cur : paragraph = [
  62                 .first_line_prefix = [][:],
  63                 .gen_line_prefix = [][:],
  64                 .content = [][:],
  65                 .empty = false,
  66                 .merged = false,
  67         ]
  68         for c : std.bychar(str)
  69                 if c == ('\r' : char)
  70                         continue /* dorks */
  71                 ;;
  72                 if c == ('\n' : char)
  73                         std.slpush(&p, p_cur)
  74                         p_cur = [
  75                                 .first_line_prefix = [][:],
  76                                 .gen_line_prefix = [][:],
  77                                 .content = [][:],
  78                                 .empty = false,
  79                                 .merged = false,
  80                         ]
  81                         s = `Reading_prefix
  82                         continue
  83                 ;;
  84
  85                 match s
  86                 | `Reading_prefix:
  87                         if is_textual_content(c)
  88                                 s = `Reading_line
  89                                 std.slpush(&p_cur.content, c)
  90                         else
  91                                 std.slpush(&p_cur.first_line_prefix, c)
  92                         ;;
  93                 | `Reading_line:
  94                         std.slpush(&p_cur.content, c)
  95                         if std.isblank(c)
  96                                 s = `Reading_line_last_was_ws
  97                         ;;
  98                 | `Reading_line_last_was_ws:
  99                         if !std.isblank(c)
 100                                 std.slpush(&p_cur.content, c)
 101                                 s = `Reading_line
 102                         ;;
 103                 ;;
 104         ;;
 105
 106         if p_cur.first_line_prefix.len > 0 || p_cur.content.len > 0
 107                 std.slpush(&p, p_cur)
 108         ;;
 109
 110         -> p
 111 }
 112
 113 /*
 114    I don't typically denote lists with ", ', or (. They should really be
 115    considered alphanumeric. TODO: exotic unicode should go here as well.
 116  */
 117 const is_textual_content = { c : char
 118         if std.isalpha(c)
 119                 -> true
 120         ;;
 121
 122         match c
 123         | '$':  -> true
 124         | '"':  -> true
 125         | '(':  -> true
 126         | ')':  -> true
 127         | '[':  -> true
 128         | '\'': -> true
 129         | '\\': -> true
 130         | ']':  -> true
 131         | '_':  -> true
 132         | '`':  -> true
 133         | _:    -> false
 134         ;;
 135 }
 136
 137 /* Do the paragraph joining thing */
 138 const digest = {p
 139         /* Mark as empty */
 140         for var j = 0; j < p.len; ++j
 141                 p[j].empty = (p[j].content.len == 0)
 142         ;;
 143
 144         /* Easy merges: first two cases in "the algorithm" */
 145         for var j = 0; j + 1 < p.len; ++j
 146                 if p[j].empty == p[j + 1].empty && equiv_prefixes(p[j].first_line_prefix, p[j + 1].first_line_prefix)
 147                         if !p[j].merged
 148                                 p[j].gen_line_prefix = std.sldup(p[j + 1].first_line_prefix)
 149                         ;;
 150                         merge_para(&p, j, j + 1)
 151                         j--
 152                 ;;
 153         ;;
 154
 155         /* Hard merges */
 156
 157         /* third case of "the algorithm" */
 158         for var j = 0; j + 1 < p.len; ++j
 159                 if !p[j].empty && !p[j + 1].empty && is_prefix(p[j].first_line_prefix, p[j + 1].first_line_prefix)
 160                         var l = p[j].first_line_prefix.len
 161                         var new_content = std.sldup(p[j + 1].first_line_prefix[l:])
 162                         std.sljoin(&new_content, p[j + 1].content)
 163                         clean(p[j + 1])
 164                         p[j + 1] = [
 165                                 .first_line_prefix = [][:],
 166                                 .gen_line_prefix = [][:],
 167                                 .content = new_content,
 168                         ]
 169                         merge_para(&p, j, j + 1)
 170                         j--
 171                 ;;
 172         ;;
 173
 174         /* fourth case of "the algorithm" */
 175         for var j = 0; j + 1 < p.len; ++j
 176                 if j > 0 && !p[j - 1].empty
 177                         continue
 178                 ;;
 179
 180                 if j + 2 < p.len && !p[j + 2].empty
 181                         continue
 182                 ;;
 183
 184                 if p[j].empty || p[j + 1].empty || p[j].merged
 185                         continue
 186                 ;;
 187
 188                 p[j].gen_line_prefix = std.sldup(p[j + 1].first_line_prefix)
 189                 merge_para(&p, j, j + 1)
 190         ;;
 191
 192         /* The unmerged give no distinction to the first */
 193         for var j = 0; j < p.len; ++j
 194                 if !p[j].merged
 195                         p[j].gen_line_prefix = std.sldup(p[j].first_line_prefix)
 196                 ;;
 197         ;;
 198
 199         /* Finally, strip whitespace from the end of content */
 200         for var j = 0; j < p.len; ++j
 201                 var c = &p[j].content
 202                 while c#.len > 0 && std.isblank(c#[c#.len - 1])
 203                         std.sldel(c, c#.len - 1)
 204                 ;;
 205         ;;
 206
 207         -> p
 208 }
 209
 210 const regurgitate = {p, max
 211         var sb : std.strbuf# = std.mksb()
 212         for a : p
 213                 var cur_pos = 0
 214                 if a.empty
 215                         /* maybe we can get away with dropping the prefix? */
 216                         var need_prefix = false
 217                         for c : a.first_line_prefix
 218                                 if !std.isblank(c)
 219                                         need_prefix = true
 220                                         break
 221                                 ;;
 222                         ;;
 223                         if !need_prefix
 224                                 std.sbputc(sb, '\n')
 225                                 continue
 226                         ;;
 227
 228                         /* Oh well, just handle it normally */
 229                 ;;
 230
 231                 /* initial prefix */
 232                 for c : a.first_line_prefix
 233                         std.sbputc(sb, c)
 234                         cur_pos += std.cellwidth(c)
 235                 ;;
 236
 237                 /* precalculate this */
 238                 var gen_prefix_len = 0
 239                 for c : a.gen_line_prefix
 240                         gen_prefix_len += std.cellwidth(c)
 241                 ;;
 242
 243                 var st, sn, e, wt, wn
 244                 var j = 0
 245                 while j < a.content.len
 246                         (st, sn, e, wt, wn) = hypothetical_forward(a.content, j)
 247                         if cur_pos + wt > max && gen_prefix_len + wn <= max
 248                                 std.sbputc(sb, '\n')
 249                                 for c : a.gen_line_prefix
 250                                         std.sbputc(sb, c)
 251                                 ;;
 252                                 for var k = sn; k < e; ++k
 253                                         std.sbputc(sb, a.content[k])
 254                                 ;;
 255                                 cur_pos = gen_prefix_len + wn
 256                         else
 257                                 for var k = st; k < e; ++k
 258                                         std.sbputc(sb, a.content[k])
 259                                 ;;
 260                                 cur_pos+= wt
 261                         ;;
 262
 263                         j = e
 264                 ;;
 265
 266                 std.sbputc(sb, ('\n' : char))
 267         ;;
 268
 269         std.writeall(1, std.sbfin(sb))
 270 }
 271
 272 const equiv_prefixes = {a, b
 273         var ak = 0
 274         var bk = 0
 275         while true
 276                 while ak < a.len && std.isblank(a[ak])
 277                         ak++
 278                 ;;
 279
 280                 while bk < b.len && std.isblank(b[bk])
 281                         bk++
 282                 ;;
 283
 284                 if (ak < a.len) != (bk < b.len)
 285                         -> false
 286                 elif ak < a.len
 287                         if a[ak] != b[bk]
 288                                 -> false
 289                         ;;
 290                 else
 291                         break
 292                 ;;
 293
 294                 ak++
 295                 bk++
 296         ;;
 297
 298         -> true
 299 }
 300
 301 const is_prefix = {pre, s
 302         if pre.len > s.len
 303                 -> false
 304         ;;
 305
 306         for var j = 0; j < pre.len; ++j
 307                 if pre[j] != s[j]
 308                         -> false
 309                 ;;
 310         ;;
 311
 312         -> true
 313 }
 314
 315
 316 const clean = {p
 317         std.slfree(p.first_line_prefix)
 318         std.slfree(p.gen_line_prefix)
 319         std.slfree(p.content)
 320 }
 321
 322 const merge_para = {p, j, k
 323         if (p#[j].content.len > 0 && !std.isblank(p#[j].content[p#[j].content.len - 1]))
 324                 /* TODO: what if you use U+3000 instead of ' '? Huh? */
 325                 std.slpush(&(p#[j].content), (' ' : char))
 326         ;;
 327         std.sljoin(&(p#[j].content), p#[k].content)
 328         clean(p#[k])
 329         std.sldel(p, k)
 330         p#[j].merged = true
 331 }
 332
 333 const hypothetical_forward = {c, j
 334         var start_if_this_line = j
 335         var start_if_next_line = j
 336         var end = j
 337         var width_if_this_line = 0
 338         var width_if_next_line = 0
 339         var past_first_blanks = false
 340
 341         while end < c.len
 342                 /*
 343                    By the normalization in ingest() we should only have
 344                    one blank separating non-blanks. Still, let's be damn
 345                    sure.
 346                  */
 347                 if !past_first_blanks
 348                         if!std.isblank(c[end])
 349                                 past_first_blanks = true
 350                                 start_if_next_line = end
 351                         else
 352                                 width_if_this_line += std.cellwidth(c[end])
 353                         ;;
 354                 ;;
 355
 356                 if past_first_blanks
 357                         if std.isblank(c[end])
 358                                 break
 359                         ;;
 360                         width_if_this_line += std.cellwidth(c[end])
 361                         width_if_next_line += std.cellwidth(c[end])
 362                 ;;
 363
 364                 end++
 365         ;;
 366
 367         -> (start_if_this_line, start_if_next_line, end, width_if_this_line, width_if_next_line)
 368 }