1 // CODYlib -*- mode:c++ -*-
2 // Copyright (C) 2020 Nathan Sidwell, nathan@acm.org
3 // License: Apache v2.0
17 // Lines consist of words and end with a NEWLINE (0xa) char
18 // Whitespace characters are TAB (0x9) and SPACE (0x20)
19 // Words consist of non-whitespace chars separated by whitespace.
20 // Multiple lines in one transaction are indicated by ending non-final
21 // lines with a SEMICOLON (0x3b) word, immediately before the NEWLINE
22 // Continuations with ; preceding it
23 // Words matching regexp [-+_/%.a-zA-Z0-9]+ need no quoting.
25 // Anything outside of [-+_/%.a-zA-Z0-9] needs quoting
26 // Anything outside of <= <space> or DEL or \' or \\ needs escaping.
27 // Escapes are \\, \', \n, \t, \_, everything else as \<hex><hex>?
28 // Spaces separate words, UTF8 encoding for non-ascii chars
33 static const char CONTINUE
= S2C(u8
";");
35 void MessageBuffer::BeginLine ()
39 // Terminate the previous line with a continuation
40 buffer
.reserve (buffer
.size () + 3);
41 buffer
.push_back (S2C(u8
" "));
42 buffer
.push_back (CONTINUE
);
43 buffer
.push_back (S2C(u8
"\n"));
45 lastBol
= buffer
.size ();
48 // QUOTE means 'maybe quote', we search it for quote-needing chars
50 void MessageBuffer::Append (char const *str
, bool quote
, size_t len
)
52 if (len
== ~size_t (0))
58 // We want to quote characters outside of [-+_A-Za-z0-9/%.], anything
59 // that could remotely be shell-active. UTF8 encoding for non-ascii.
63 // Scan looking for quote-needing characters. We could just
64 // append until we find one, but that's probably confusing
65 for (size_t ix
= len
; ix
--;)
67 unsigned char c
= (unsigned char)str
[ix
];
68 if (!((c
>= S2C(u8
"a") && c
<= S2C(u8
"z"))
69 || (c
>= S2C(u8
"A") && c
<= S2C(u8
"Z"))
70 || (c
>= S2C(u8
"0") && c
<= S2C(u8
"9"))
71 || c
== S2C(u8
"-") || c
== S2C(u8
"+") || c
== S2C(u8
"_")
72 || c
== S2C(u8
"/") || c
== S2C(u8
"%") || c
== S2C(u8
".")))
80 // Maximal length of appended string
81 buffer
.reserve (buffer
.size () + len
* (quote
? 3 : 1) + 2);
84 buffer
.push_back (S2C(u8
"'"));
86 for (auto *end
= str
+ len
; str
!= end
;)
91 // Look for next escape-needing char. More relaxed than
92 // the earlier needs-quoting check.
93 for (e
= str
; e
!= end
; ++e
)
95 unsigned char c
= (unsigned char)*e
;
96 if (c
< S2C(u8
" ") || c
== 0x7f
97 || c
== S2C(u8
"\\") || c
== S2C(u8
"'"))
100 buffer
.insert (buffer
.end (), str
, e
);
106 buffer
.push_back (S2C(u8
"\\"));
107 switch (unsigned char c
= (unsigned char)*str
++)
120 buffer
.push_back (c
);
124 // Full-on escape. Use 2 lower-case hex chars
125 for (unsigned shift
= 8; shift
;)
129 char nibble
= (c
>> shift
) & 0xf;
130 nibble
+= S2C(u8
"0");
131 if (nibble
> S2C(u8
"9"))
132 nibble
+= S2C(u8
"a") - (S2C(u8
"9") + 1);
133 buffer
.push_back (nibble
);
139 buffer
.push_back (S2C(u8
"'"));
142 void MessageBuffer::Append (char c
)
144 buffer
.push_back (c
);
147 void MessageBuffer::AppendInteger (unsigned u
)
149 // Sigh, even though std::to_string is C++11, we support building on
150 // gcc 4.8, which is a C++11 compiler lacking std::to_string. so
151 // have something horrible.
152 std::string
v (20, 0);
153 size_t len
= snprintf (const_cast<char *> (v
.data ()), v
.size (), "%u", u
);
159 int MessageBuffer::Write (int fd
) noexcept
161 size_t limit
= buffer
.size () - lastBol
;
162 ssize_t count
= write (fd
, &buffer
.data ()[lastBol
], limit
);
170 if (size_t (count
) != limit
)
174 if (err
!= EAGAIN
&& err
!= EINTR
)
176 // Reset for next message
184 int MessageBuffer::Read (int fd
) noexcept
186 constexpr size_t blockSize
= 200;
188 size_t lwm
= buffer
.size ();
189 size_t hwm
= buffer
.capacity ();
190 if (hwm
- lwm
< blockSize
/ 2)
194 auto iter
= buffer
.begin () + lwm
;
195 ssize_t count
= read (fd
, &*iter
, hwm
- lwm
);
196 buffer
.resize (lwm
+ (count
>= 0 ? count
: 0));
208 auto newline
= std::find (iter
, buffer
.end (), S2C(u8
"\n"));
209 if (newline
== buffer
.end ())
211 more
= newline
!= buffer
.begin () && newline
[-1] == CONTINUE
;
214 if (iter
== buffer
.end ())
219 // There is no continuation, but there are chars after the
220 // newline. Truncate the buffer and return an error
221 buffer
.resize (iter
- buffer
.begin ());
226 return more
? EAGAIN
: 0;
229 int MessageBuffer::Lex (std::vector
<std::string
> &result
)
236 Assert (buffer
.back () == S2C(u8
"\n"));
238 auto iter
= buffer
.begin () + lastBol
;
240 for (std::string
*word
= nullptr;;)
245 if (c
== S2C(u8
" ") || c
== S2C(u8
"\t"))
251 if (c
== S2C(u8
"\n"))
257 if (word
|| *iter
!= S2C(u8
"\n"))
263 if (c
<= S2C(u8
" ") || c
>= 0x7f)
268 result
.emplace_back ();
269 word
= &result
.back ();
279 if (c
== S2C(u8
"\n"))
283 iter
= std::find (iter
, buffer
.end (), S2C(u8
"\n"));
285 if (back
[-1] == CONTINUE
&& back
[-2] == S2C(u8
" "))
286 // Smells like a line continuation
288 result
.emplace_back (&buffer
[lastBol
],
289 back
- buffer
.begin () - lastBol
);
291 lastBol
= iter
- buffer
.begin ();
295 if (c
< S2C(u8
" ") || c
>= 0x7f)
302 if (c
== S2C(u8
"\\"))
317 // We used to escape SPACE as \_, so accept that
330 for (unsigned nibble
= 0; nibble
!= 2; nibble
++)
339 else if (c
<= S2C(u8
"9"))
341 else if (c
< S2C(u8
"a"))
347 else if (c
<= S2C(u8
"f"))
348 c
-= S2C(u8
"a") - 10;
365 // Unquoted character
368 lastBol
= iter
- buffer
.begin ();
375 void MessageBuffer::LexedLine (std::string
&str
)
379 size_t pos
= lastBol
- 1;
381 if (buffer
[pos
-1] == S2C(u8
"\n"))
384 size_t end
= lastBol
- 1;
385 if (buffer
[end
-1] == CONTINUE
&& buffer
[end
-2] == S2C(u8
" "))
386 // Strip line continuation
388 str
.append (&buffer
[pos
], end
- pos
);