encoding: avoid doing any work if we don't have to
[iv.d.git] / strex.d
blob98f8b1294bf721d1ef6891e40c8f6b99e9e9eb18
1 /* Invisible Vector Library
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 // some string operations: quoting, `indexOf()` for non-utf8
18 module iv.strex /*is aliced*/;
21 /// quote string: append double quotes, screen all special chars;
22 /// so quoted string forms valid D string literal.
23 /// allocates.
24 string quote (const(char)[] s) {
25 import std.array : appender;
26 import std.format : formatElement, FormatSpec;
27 auto res = appender!string();
28 FormatSpec!char fspc; // defaults to 's'
29 formatElement(res, s, fspc);
30 return res.data;
34 /// convert integral number to number with commas
35 char[] intWithCommas(T) (char[] dest, T nn, char comma=',') if (__traits(isIntegral, T)) {
36 static if (__traits(isUnsigned, T)) {
37 enum neg = false;
38 alias n = nn;
39 } else {
40 bool neg = (nn < 0);
41 static if (T.sizeof < 8) {
42 long n = nn;
43 if (neg) n = -n;
44 if (n < 0) n = T.max;
45 } else {
46 alias n = nn;
47 if (neg) n = -n;
48 if (n < 0) n = T.max; //FIXME
51 char[256] buf = void;
52 int bpos = cast(int)buf.length;
53 int leftToComma = 3;
54 do {
55 if (leftToComma-- == 0) { buf[--bpos] = comma; leftToComma = 2; }
56 buf[--bpos] = cast(char)('0'+n%10);
57 } while ((n /= 10) != 0);
58 if (neg) buf[--bpos] = '-';
59 auto len = buf.length-bpos;
60 if (dest is null) dest = new char[](len);
61 if (len > dest.length) len = dest.length;
62 dest[0..len] = buf[bpos..bpos+len];
63 return dest[0..len];
66 char[] intWithCommas(T) (T nn, char comma=',') if (__traits(isIntegral, T)) { return intWithCommas(null, nn, comma); }
69 char tolower (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'A' && ch <= 'Z' ? cast(char)(ch-'A'+'a') : ch); }
70 char toupper (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'a' && ch <= 'z' ? cast(char)(ch-'a'+'A') : ch); }
72 bool islower (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'a' && ch <= 'z'); }
73 bool isupper (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'A' && ch <= 'Z'); }
75 bool isalpha (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')); }
76 bool isdigit (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= '0' && ch <= '9'); }
77 bool isalnum (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')); }
78 bool isxdigit (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f') || (ch >= '0' && ch <= '9')); }
80 int digitInBase (char ch, int base=10) pure nothrow @trusted @nogc {
81 pragma(inline, true);
82 return
83 ch >= '0' && ch <= '9' && ch-'0' < base ? ch-'0' :
84 base > 10 && ch >= 'A' && ch < 'Z' && ch-'A'+10 < base ? ch-'A'+10 :
85 base > 10 && ch >= 'a' && ch < 'z' && ch-'a'+10 < base ? ch-'a'+10 :
86 -1;
90 alias atof = atofd!float; /// very simple atof/atod converter. accepts exponents. returns NaN on error.
91 alias atod = atofd!double; /// very simple atof/atod converter. accepts exponents. returns NaN on error.
93 /// very simple atof/atod converter. accepts exponents.
94 /// returns NaN on error.
95 T atofd(T) (const(char)[] str) pure nothrow @trusted @nogc if (is(T == float) || is(T == double)) {
96 if (str.length == 0) return T.nan; // oops
98 const(char)[] s = str;
99 double res = 0.0, sign = 1.0;
100 bool hasIntPart = false, hasFracPart = false;
102 char peekChar () nothrow @trusted @nogc { pragma(inline, true); return (s.length ? s.ptr[0] : '\0'); }
103 void skipChar () nothrow @trusted @nogc { pragma(inline, true); if (s.length > 0) s = s[1..$]; }
104 char getChar () nothrow @trusted @nogc { char ch = 0; if (s.length > 0) { ch = s.ptr[0]; s = s[1..$]; } return ch; }
106 // optional sign
107 switch (peekChar) {
108 case '-': sign = -1; goto case;
109 case '+': skipChar(); break;
110 default: break;
113 // integer part
114 if (isdigit(peekChar)) {
115 hasIntPart = true;
116 while (isdigit(peekChar)) res = res*10.0+(getChar()-'0');
119 // fractional part.
120 if (peekChar == '.') {
121 skipChar(); // skip '.'
122 if (isdigit(peekChar)) {
123 hasFracPart = true;
124 int divisor = 1;
125 long num = 0;
126 while (isdigit(peekChar)) {
127 divisor *= 10;
128 num = num*10+(getChar()-'0');
130 res += cast(double)num/divisor;
134 // valid number should have integer or fractional part
135 if (!hasIntPart && !hasFracPart) return T.nan;
137 // optional exponent
138 if (peekChar == 'e' || peekChar == 'E') {
139 skipChar(); // skip 'E'
140 // optional sign
141 bool epositive = true;
142 switch (peekChar) {
143 case '-': epositive = false; goto case;
144 case '+': skipChar(); break;
145 default: break;
147 int expPart = 0;
148 while (isdigit(peekChar)) expPart = expPart*10+(getChar()-'0');
149 if (epositive) {
150 foreach (immutable _; 0..expPart) res *= 10.0;
151 } else {
152 foreach (immutable _; 0..expPart) res /= 10.0;
156 return cast(T)(res*sign);
160 // ascii only
161 bool strEquCI (const(char)[] s0, const(char)[] s1) pure nothrow @trusted @nogc {
162 if (s0.length != s1.length) return false;
163 foreach (immutable idx, char c0; s0) {
164 // try the easiest case first
165 if (__ctfe) {
166 if (c0 == s1[idx]) continue;
167 } else {
168 if (c0 == s1.ptr[idx]) continue;
170 c0 |= 0x20; // convert to ascii lowercase
171 if (c0 < 'a' || c0 > 'z') return false; // it wasn't a letter, no need to check the second char
172 // c0 is guaranteed to be a lowercase ascii here
173 if (__ctfe) {
174 if (c0 != (s1[idx]|0x20)) return false; // c1 will become a lowercase ascii only if it was uppercase/lowercase ascii
175 } else {
176 if (c0 != (s1.ptr[idx]|0x20)) return false; // c1 will become a lowercase ascii only if it was uppercase/lowercase ascii
179 return true;
183 version(test_strex) unittest {
184 assert(strEquCI("Alice", "alice"));
185 assert(strEquCI("alice", "Alice"));
186 assert(strEquCI("alice", "alice"));
190 // ascii only
191 int strCmpCI (const(char)[] s0, const(char)[] s1) pure nothrow @trusted @nogc {
192 auto slen = s0.length;
193 if (slen > s1.length) slen = s1.length;
194 char c1;
195 foreach (immutable idx, char c0; s0[0..slen]) {
196 c0 = c0.tolower;
197 if (__ctfe) {
198 c1 = s1[idx].tolower;
199 } else {
200 c1 = s1.ptr[idx].tolower;
202 if (c0 < c1) return -1;
203 if (c0 > c1) return 1;
205 if (s0.length < s1.length) return -1;
206 if (s0.length > s1.length) return +1;
207 return 0;
211 inout(char)[] xstrip (inout(char)[] s) pure nothrow @trusted @nogc {
212 if (__ctfe) {
213 while (s.length && s[0] <= ' ') s = s[1..$];
214 } else {
215 while (s.length && s.ptr[0] <= ' ') s = s[1..$];
217 while (s.length && s[$-1] <= ' ') s = s[0..$-1];
218 return s;
222 inout(char)[] xstripleft (inout(char)[] s) pure nothrow @trusted @nogc {
223 if (__ctfe) {
224 while (s.length && s[0] <= ' ') s = s[1..$];
225 } else {
226 while (s.length && s.ptr[0] <= ' ') s = s[1..$];
228 return s;
232 inout(char)[] xstripright (inout(char)[] s) pure nothrow @trusted @nogc {
233 while (s.length && s[$-1] <= ' ') s = s[0..$-1];
234 return s;
238 bool startsWith (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
239 if (pat.length > str.length) return false;
240 return (str[0..pat.length] == pat);
244 bool endsWith (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
245 if (pat.length > str.length) return false;
246 return (str[$-pat.length..$] == pat);
250 // ascii only
251 bool startsWithCI (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
252 if (pat.length > str.length) return false;
253 return strEquCI(str[0..pat.length], pat);
257 // ascii only
258 bool endsWithCI (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
259 if (pat.length > str.length) return false;
260 return strEquCI(str[$-pat.length..$], pat);
264 ptrdiff_t indexOf (const(char)[] hay, const(char)[] need, size_t stIdx=0) pure nothrow @trusted @nogc {
265 if (hay.length <= stIdx || need.length == 0 || need.length > hay.length-stIdx) {
266 return -1;
267 } else {
268 if (need.length == 1) {
269 if (__ctfe) {
270 return indexOf(hay, need[0], stIdx);
271 } else {
272 return indexOf(hay, need.ptr[0], stIdx);
274 } else {
275 if (__ctfe) {
276 foreach (immutable idx; stIdx..hay.length-need.length+1) {
277 if (hay[idx..idx+need.length] == need) return idx;
279 return -1;
280 } else {
281 auto res = cast(const(char)*)memmem(hay.ptr+stIdx, hay.length-stIdx, need.ptr, need.length);
282 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
288 ptrdiff_t indexOf (const(char)[] hay, char ch, size_t stIdx=0) pure nothrow @trusted @nogc {
289 if (hay.length <= stIdx) {
290 return -1;
291 } else {
292 if (__ctfe) {
293 foreach (immutable idx; stIdx..hay.length) {
294 if (hay[idx] == ch) return idx;
296 return -1;
297 } else {
298 import core.stdc.string : memchr;
299 auto res = cast(const(char)*)memchr(hay.ptr+stIdx, ch, hay.length-stIdx);
300 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
306 ptrdiff_t lastIndexOf (const(char)[] hay, const(char)[] need, size_t stIdx=0) pure nothrow @trusted @nogc {
307 if (hay.length <= stIdx || need.length == 0 || need.length > hay.length-stIdx) {
308 return -1;
309 } else {
310 if (hay.length == 1) {
311 if (__ctfe) {
312 return lastIndexOf(hay, need[0], stIdx);
313 } else {
314 return lastIndexOf(hay, need.ptr[0], stIdx);
316 } else {
317 if (__ctfe) {
318 foreach_reverse (immutable idx; stIdx..hay.length-need.length+1) {
319 if (hay[idx..idx+need.length] == need) return idx;
321 return -1;
322 } else {
323 auto res = cast(char*)memrmem(hay.ptr+stIdx, hay.length-stIdx, need.ptr, need.length);
324 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
330 ptrdiff_t lastIndexOf (const(char)[] hay, char ch, size_t stIdx=0) pure nothrow @trusted @nogc {
331 if (hay.length <= stIdx) {
332 return -1;
333 } else {
334 if (__ctfe) {
335 foreach_reverse (immutable idx; stIdx..hay.length) {
336 if (hay[idx] == ch) return idx;
338 return -1;
339 } else {
340 auto res = cast(const(char)*)memrchr(hay.ptr+stIdx, ch, hay.length-stIdx);
341 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
347 version(test_strex) unittest {
348 assert(indexOf("Alice & Miriel", " & ") == 5);
349 assert(indexOf("Alice & Miriel", " &!") == -1);
350 assert(indexOf("Alice & Miriel", "Alice & Miriel was here!") == -1);
351 assert(indexOf("Alice & Miriel", '&') == 6);
352 char ch = ' ';
353 assert(indexOf("Alice & Miriel", ch) == 5);
355 assert(indexOf("Alice & Miriel", "i") == 2);
356 assert(indexOf("Alice & Miriel", "i", 6) == 9);
357 assert(indexOf("Alice & Miriel", "i", 12) == -1);
359 assert(indexOf("Alice & Miriel", "Miriel", 8) == 8);
360 assert(indexOf("Alice & Miriel", "Miriel", 9) == -1);
362 assert(lastIndexOf("Alice & Miriel", "i") == 11);
363 assert(lastIndexOf("Alice & Miriel", "i", 6) == 11);
364 assert(lastIndexOf("Alice & Miriel", "i", 11) == 11);
365 assert(lastIndexOf("Alice & Miriel", "i", 12) == -1);
367 assert(lastIndexOf("iiii", "ii") == 2);
371 string detab (const(char)[] s, uint tabSize=8) {
372 assert(tabSize > 0);
374 import std.array : appender;
375 auto res = appender!string();
376 uint col = 0;
378 foreach (char ch; s) {
379 if (ch == '\n' || ch == '\r') {
380 col = 0;
381 } else if (ch == '\t') {
382 auto spins = tabSize-col%tabSize;
383 col += spins;
384 while (spins-- > 1) res.put(' ');
385 ch = ' ';
386 } else {
387 ++col;
389 res.put(ch);
392 return res.data;
396 version(test_strex) unittest {
397 assert(detab(" \n\tx", 9) == " \n x");
398 assert(detab(" ab\t asdf ") == " ab asdf ");
402 auto byLine(T) (T s) if (is(T:const(char)[])) {
403 static struct Range(T) {
404 nothrow @safe @nogc:
405 private:
406 T s;
407 size_t llen, npos;
408 this (T as) { s = as; popFront(); }
409 public:
410 @property bool empty () const { pragma(inline, true); return (s.length == 0); }
411 @property T front () const { pragma(inline, true); return cast(T)s[0..llen]; } // fuckin' const!
412 auto save () const @trusted { Range!T res = void; res.s = s; res.llen = llen; res.npos = npos; return res; }
413 void popFront () @trusted {
414 s = s[npos..$];
415 llen = npos = 0;
416 while (npos < s.length) {
417 if (s.ptr[npos] == '\r') {
418 llen = npos;
419 if (s.length-npos > 1 && s.ptr[npos+1] == '\n') ++npos;
420 ++npos;
421 return;
423 if (s.ptr[npos] == '\n') {
424 llen = npos;
425 ++npos;
426 return;
428 ++npos;
430 llen = npos;
433 return Range!T(s);
437 version(test_strex) unittest {
438 enum s = q{
439 import std.stdio;
440 void main() {
441 writeln("Hello");
444 enum ugly = q{
445 import std.stdio;
446 void main() {
447 writeln("Hello");
451 foreach (/+auto+/ line; s.byLine) {
452 import std.stdio;
453 writeln("LN: [", line, "]");
456 foreach (/+auto+/ line; ugly.byLine) {
457 import std.stdio;
458 writeln("LN: [", line, "]");
463 // string should be detabbed!
464 string outdentAll (const(char)[] s) {
465 import std.array : appender;
466 // first calculate maximum indent spaces
467 uint maxspc = uint.max;
468 foreach (/*auto*/ line; s.byLine) {
469 uint col = 0;
470 while (col < line.length && line.ptr[col] <= ' ') {
471 if (line.ptr[col] == '\t') assert(0, "can't outdent shit with tabs");
472 ++col;
474 if (col >= line.length) continue; // empty line, don't care
475 if (col < maxspc) maxspc = col;
476 if (col == 0) break; // nothing to do anymore
479 auto res = appender!string();
480 foreach (/*auto*/ line; s.byLine) {
481 uint col = 0;
482 while (col < line.length && line.ptr[col] <= ' ') ++col;
483 if (col < line.length) {
484 // non-empty line
485 res.put(line[maxspc..$]);
487 res.put('\n');
490 return res.data;
494 version(test_strex) unittest {
495 enum pretty = q{
496 import std.stdio;
497 void main() {
498 writeln("Hello");
500 }.outdentAll;
502 enum ugly = q{
503 import std.stdio;
504 void main() {
505 writeln("Hello");
510 import std.stdio;
511 assert(pretty == ugly);
515 //From: Yahoo Groups <confirm-s2-2ny0qbq23nljzefbilh5vpjrg1pik5hf-ketmar=ketmar.no-ip.org@yahoogroups.com>
516 private bool isValidEmailNameChar (char ch) pure nothrow @safe @nogc {
517 pragma(inline, true);
518 if (ch <= 32) return false;
519 if (ch >= '0' && ch <= '9') return true;
520 if (ch >= 'a' && ch <= 'z') ch -= 32; // poor man's tolower
521 if (ch >= 'A' && ch <= 'Z') return true;
522 if (ch == '_' || ch == '+' || ch == '-' || ch == '=' || ch == '.' || ch == '$') return true;
523 if (ch >= 128) return true; // why not?
524 // why not?
525 if (ch == '!' || ch == '%' || ch == '^' || ch == '&' || ch == '(' || ch == ')') return true;
526 if (ch == '?') return true;
527 return false;
531 private bool isValidEmailHostChar (char ch) pure nothrow @safe @nogc {
532 pragma(inline, true);
533 if (ch <= 32 || ch >= 127) return false;
534 if (ch >= '0' && ch <= '9') return true;
535 if (ch >= 'a' && ch <= 'z') ch -= 32; // poor man's tolower
536 if (ch >= 'A' && ch <= 'Z') return true;
537 if (ch == '-' || ch == '.') return true;
538 return false;
542 bool isGoodEmail (const(char)[] s) pure nothrow @trusted @nogc {
543 if (s.length == 0 || s.ptr[0] == '@') return false;
544 // parse part until '@'
545 while (s.length) {
546 char ch = s.ptr[0];
547 if (ch == '@') break;
548 if (!isValidEmailNameChar(ch)) return false;
549 s = s[1..$];
551 if (!s.length) return false; // no doggy
552 assert(s.ptr[0] == '@');
553 s = s[1..$];
554 if (s.length == 0) return false;
555 while (s.length) {
556 char ch = s.ptr[0];
557 if (!isValidEmailHostChar(ch)) return false;
558 s = s[1..$];
560 return true;
564 pure nothrow @system @nogc:
565 version(linux) {
566 extern(C) inout(void)* memmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen);
567 extern(C) inout(void)* memrchr (inout(void)* s, int ch, size_t slen);
568 } else {
569 inout(void)* memmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen) {
570 // size_t is unsigned
571 if (needlelen > haystacklen || needlelen == 0) return null;
572 auto h = cast(const(ubyte)*)haystack;
573 auto n = cast(const(ubyte)*)needle;
574 foreach (immutable i; 0..haystacklen-needlelen+1) {
575 import core.stdc.string : memcmp;
576 if (memcmp(h+i, n, needlelen) == 0) return cast(typeof(return))(h+i);
578 return null;
581 inout(void)* memrchr (inout(void)* haystack, int ch, size_t haystacklen) {
582 // size_t is unsigned
583 if (haystacklen == 0) return null;
584 auto h = cast(const(ubyte)*)haystack;
585 ch &= 0xff;
586 foreach_reverse (immutable idx, ubyte v; h[0..haystacklen]) {
587 if (v == ch) return cast(typeof(return))(h+idx);
589 return null;
593 inout(void)* memrmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen) {
594 if (needlelen > haystacklen) return null;
595 auto h = cast(const(ubyte)*)haystack;
596 const(ubyte)* res = null;
597 // size_t is unsigned
598 if (needlelen > haystacklen || needlelen == 0) return null;
599 version(none) {
600 size_t pos = 0;
601 while (pos < haystacklen-needlelen+1) {
602 auto ff = memmem(haystack+pos, haystacklen-pos, needle, needlelen);
603 if (ff is null) break;
604 res = cast(const(ubyte)*)ff;
605 pos = cast(size_t)(res-haystack)+1;
607 return cast(void*)res;
608 } else {
609 auto n = cast(const(ubyte)*)needle;
610 size_t len = haystacklen-needlelen+1;
611 while (len > 0) {
612 import core.stdc.string : memcmp;
613 auto ff = cast(const(ubyte)*)memrchr(haystack, *n, len);
614 if (ff is null) break;
615 if (memcmp(ff, needle, needlelen) == 0) return cast(void*)ff;
616 //if (ff is h) break;
617 len = cast(size_t)(ff-cast(ubyte*)haystack);
619 return null;