sq3: do not explicitly init/shutdown sqlite library; added some missing constants
[iv.d.git] / strex.d
blob4f639f53886c010bc825ec1ae00c2f52c793defd
1 /* Invisible Vector Library
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 // some string operations: quoting, `indexOf()` for non-utf8
18 module iv.strex /*is aliced*/;
21 /// quote string: append double quotes, screen all special chars;
22 /// so quoted string forms valid D string literal.
23 /// allocates.
24 string quote (const(char)[] s) {
25 import std.array : appender;
26 import std.format : formatElement, FormatSpec;
27 auto res = appender!string();
28 FormatSpec!char fspc; // defaults to 's'
29 formatElement(res, s, fspc);
30 return res.data;
34 /// convert integral number to number with commas
35 char[] intWithCommas(T) (char[] dest, T nn, char comma=',') if (__traits(isIntegral, T)) {
36 static if (__traits(isUnsigned, T)) {
37 enum neg = false;
38 alias n = nn;
39 } else {
40 bool neg = (nn < 0);
41 static if (T.sizeof < 8) {
42 long n = nn;
43 if (neg) n = -n;
44 if (n < 0) n = T.max;
45 } else {
46 alias n = nn;
47 if (neg) n = -n;
48 if (n < 0) n = T.max; //FIXME
51 char[256] buf = void;
52 int bpos = cast(int)buf.length;
53 int leftToComma = 3;
54 do {
55 if (leftToComma-- == 0) { buf[--bpos] = comma; leftToComma = 2; }
56 buf[--bpos] = cast(char)('0'+n%10);
57 } while ((n /= 10) != 0);
58 if (neg) buf[--bpos] = '-';
59 auto len = buf.length-bpos;
60 if (dest is null) dest = new char[](len);
61 if (len > dest.length) len = dest.length;
62 dest[0..len] = buf[bpos..bpos+len];
63 return dest[0..len];
66 char[] intWithCommas(T) (T nn, char comma=',') if (__traits(isIntegral, T)) { return intWithCommas(null, nn, comma); }
69 char tolower (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'A' && ch <= 'Z' ? cast(char)(ch-'A'+'a') : ch); }
70 char toupper (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'a' && ch <= 'z' ? cast(char)(ch-'a'+'A') : ch); }
72 bool islower (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'a' && ch <= 'z'); }
73 bool isupper (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'A' && ch <= 'Z'); }
75 bool isalpha (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')); }
76 bool isdigit (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= '0' && ch <= '9'); }
77 bool isalnum (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')); }
78 bool isxdigit (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f') || (ch >= '0' && ch <= '9')); }
80 int digitInBase (char ch, int base=10) pure nothrow @trusted @nogc {
81 pragma(inline, true);
82 return
83 ch >= '0' && ch <= '9' && ch-'0' < base ? ch-'0' :
84 base > 10 && ch >= 'A' && ch < 'Z' && ch-'A'+10 < base ? ch-'A'+10 :
85 base > 10 && ch >= 'a' && ch < 'z' && ch-'a'+10 < base ? ch-'a'+10 :
86 -1;
90 alias atof = atofd!float; /// very simple atof/atod converter. accepts exponents. returns NaN on error.
91 alias atod = atofd!double; /// very simple atof/atod converter. accepts exponents. returns NaN on error.
93 /// very simple atof/atod converter. accepts exponents.
94 /// returns NaN on error.
95 T atofd(T) (const(char)[] str) pure nothrow @trusted @nogc if (is(T == float) || is(T == double)) {
96 if (str.length == 0) return T.nan; // oops
98 const(char)[] s = str;
99 double res = 0.0, sign = 1.0;
100 bool hasIntPart = false, hasFracPart = false;
102 char peekChar () nothrow @trusted @nogc { pragma(inline, true); return (s.length ? s.ptr[0] : '\0'); }
103 void skipChar () nothrow @trusted @nogc { pragma(inline, true); if (s.length > 0) s = s[1..$]; }
104 char getChar () nothrow @trusted @nogc { char ch = 0; if (s.length > 0) { ch = s.ptr[0]; s = s[1..$]; } return ch; }
106 // optional sign
107 switch (peekChar) {
108 case '-': sign = -1; goto case;
109 case '+': skipChar(); break;
110 default: break;
113 // integer part
114 if (isdigit(peekChar)) {
115 hasIntPart = true;
116 while (isdigit(peekChar)) res = res*10.0+(getChar()-'0');
119 // fractional part.
120 if (peekChar == '.') {
121 skipChar(); // skip '.'
122 if (isdigit(peekChar)) {
123 hasFracPart = true;
124 int divisor = 1;
125 long num = 0;
126 while (isdigit(peekChar)) {
127 divisor *= 10;
128 num = num*10+(getChar()-'0');
130 res += cast(double)num/divisor;
134 // valid number should have integer or fractional part
135 if (!hasIntPart && !hasFracPart) return T.nan;
137 // optional exponent
138 if (peekChar == 'e' || peekChar == 'E') {
139 skipChar(); // skip 'E'
140 // optional sign
141 bool epositive = true;
142 switch (peekChar) {
143 case '-': epositive = false; goto case;
144 case '+': skipChar(); break;
145 default: break;
147 int expPart = 0;
148 while (isdigit(peekChar)) expPart = expPart*10+(getChar()-'0');
149 if (epositive) {
150 foreach (immutable _; 0..expPart) res *= 10.0;
151 } else {
152 foreach (immutable _; 0..expPart) res /= 10.0;
156 return cast(T)(res*sign);
160 // ascii only
161 bool strEquCI (const(char)[] s0, const(char)[] s1) pure nothrow @trusted @nogc {
162 if (s0.length != s1.length) return false;
163 foreach (immutable idx, char c0; s0) {
164 if (__ctfe) {
165 if (c0.tolower != s1[idx].tolower) return false;
166 } else {
167 if (c0.tolower != s1.ptr[idx].tolower) return false;
170 return true;
174 // ascii only
175 int strCmpCI (const(char)[] s0, const(char)[] s1) pure nothrow @trusted @nogc {
176 auto slen = s0.length;
177 if (slen > s1.length) slen = s1.length;
178 char c1;
179 foreach (immutable idx, char c0; s0[0..slen]) {
180 c0 = c0.tolower;
181 if (__ctfe) {
182 c1 = s1[idx].tolower;
183 } else {
184 c1 = s1.ptr[idx].tolower;
186 if (c0 < c1) return -1;
187 if (c0 > c1) return 1;
189 if (s0.length < s1.length) return -1;
190 if (s0.length > s1.length) return 1;
191 return 0;
193 if (s0.length < s1.length) return -1;
194 if (s0.length > s1.length) return 1;
195 char c1;
196 foreach (immutable idx, char c0; s0) {
197 c0 = c0.tolower;
198 if (__ctfe) {
199 c1 = s1[idx].tolower;
200 } else {
201 c1 = s1.ptr[idx].tolower;
203 if (c0 < c1) return -1;
204 if (c0 > c1) return 1;
206 return 0;
211 inout(char)[] xstrip (inout(char)[] s) pure nothrow @trusted @nogc {
212 while (s.length && s.ptr[0] <= ' ') s = s[1..$];
213 while (s.length && s[$-1] <= ' ') s = s[0..$-1];
214 return s;
218 inout(char)[] xstripleft (inout(char)[] s) pure nothrow @trusted @nogc {
219 while (s.length && s.ptr[0] <= ' ') s = s[1..$];
220 return s;
224 inout(char)[] xstripright (inout(char)[] s) pure nothrow @trusted @nogc {
225 while (s.length && s[$-1] <= ' ') s = s[0..$-1];
226 return s;
230 bool startsWith (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
231 import core.stdc.string : memcmp;
232 if (pat.length > str.length) return false;
233 return (memcmp(str.ptr, pat.ptr, pat.length) == 0);
237 bool endsWith (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
238 import core.stdc.string : memcmp;
239 if (pat.length > str.length) return false;
240 return (memcmp(str.ptr+str.length-pat.length, pat.ptr, pat.length) == 0);
244 bool startsWithCI (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
245 if (pat.length > str.length) return false;
246 auto s = cast(const(char)*)str.ptr;
247 auto p = cast(const(char)*)pat.ptr;
248 foreach (immutable _; 0..pat.length) if (tolower(*s++) != tolower(*p++)) return false;
249 return true;
253 bool endsWithCI (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
254 if (pat.length > str.length) return false;
255 auto s = cast(const(char)*)str.ptr+str.length-pat.length;
256 auto p = cast(const(char)*)pat.ptr;
257 foreach (immutable _; 0..pat.length) if (tolower(*s++) != tolower(*p++)) return false;
258 return true;
262 ptrdiff_t indexOf (const(char)[] hay, const(char)[] need, size_t stIdx=0) pure nothrow @trusted @nogc {
263 if (hay.length <= stIdx || need.length == 0 || need.length > hay.length-stIdx) {
264 return -1;
265 } else {
266 auto res = cast(char*)memmem(hay.ptr+stIdx, hay.length-stIdx, need.ptr, need.length);
267 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
271 ptrdiff_t indexOf (const(char)[] hay, char ch, size_t stIdx=0) pure nothrow @trusted @nogc {
272 return indexOf(hay, (&ch)[0..1], stIdx);
276 ptrdiff_t lastIndexOf (const(char)[] hay, const(char)[] need, size_t stIdx=0) pure nothrow @trusted @nogc {
277 if (hay.length <= stIdx || need.length == 0 || need.length > hay.length-stIdx) {
278 return -1;
279 } else {
280 auto res = cast(char*)memrmem(hay.ptr+stIdx, hay.length-stIdx, need.ptr, need.length);
281 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
285 ptrdiff_t lastIndexOf (const(char)[] hay, char ch, size_t stIdx=0) pure nothrow @trusted @nogc {
286 return lastIndexOf(hay, (&ch)[0..1], stIdx);
290 version(test_strex) unittest {
291 assert(indexOf("Alice & Miriel", " & ") == 5);
292 assert(indexOf("Alice & Miriel", " &!") == -1);
293 assert(indexOf("Alice & Miriel", "Alice & Miriel was here!") == -1);
294 assert(indexOf("Alice & Miriel", '&') == 6);
295 char ch = ' ';
296 assert(indexOf("Alice & Miriel", ch) == 5);
298 assert(indexOf("Alice & Miriel", "i") == 2);
299 assert(indexOf("Alice & Miriel", "i", 6) == 9);
300 assert(indexOf("Alice & Miriel", "i", 12) == -1);
302 assert(indexOf("Alice & Miriel", "Miriel", 8) == 8);
303 assert(indexOf("Alice & Miriel", "Miriel", 9) == -1);
305 assert(lastIndexOf("Alice & Miriel", "i") == 11);
306 assert(lastIndexOf("Alice & Miriel", "i", 6) == 11);
307 assert(lastIndexOf("Alice & Miriel", "i", 11) == 11);
308 assert(lastIndexOf("Alice & Miriel", "i", 12) == -1);
310 assert(lastIndexOf("iiii", "ii") == 2);
314 string detab (const(char)[] s, uint tabSize=8) {
315 assert(tabSize > 0);
317 import std.array : appender;
318 auto res = appender!string();
319 uint col = 0;
321 foreach (char ch; s) {
322 if (ch == '\n' || ch == '\r') {
323 col = 0;
324 } else if (ch == '\t') {
325 auto spins = tabSize-col%tabSize;
326 col += spins;
327 while (spins-- > 1) res.put(' ');
328 ch = ' ';
329 } else {
330 ++col;
332 res.put(ch);
335 return res.data;
339 version(test_strex) unittest {
340 assert(detab(" \n\tx", 9) == " \n x");
341 assert(detab(" ab\t asdf ") == " ab asdf ");
345 auto byLine(T) (T s) if (is(T : const(char)[])) {
346 static struct Range(T) {
347 nothrow @safe @nogc:
348 private:
349 T s;
350 size_t llen, npos;
351 this (T as) { s = as; popFront(); }
352 public:
353 @property bool empty () const { pragma(inline, true); return (s.length == 0); }
354 @property T front () const { pragma(inline, true); return cast(T)s[0..llen]; } // fuckin' const!
355 auto save () const @trusted { Range!T res = void; res.s = s; res.llen = llen; res.npos = npos; return res; }
356 void popFront () @trusted {
357 s = s[npos..$];
358 llen = npos = 0;
359 while (npos < s.length) {
360 if (s.ptr[npos] == '\r') {
361 llen = npos;
362 if (s.length-npos > 1 && s.ptr[npos+1] == '\n') ++npos;
363 ++npos;
364 return;
366 if (s.ptr[npos] == '\n') {
367 llen = npos;
368 ++npos;
369 return;
371 ++npos;
373 llen = npos;
376 return Range!T(s);
380 version(test_strex) unittest {
381 enum s = q{
382 import std.stdio;
383 void main() {
384 writeln("Hello");
387 enum ugly = q{
388 import std.stdio;
389 void main() {
390 writeln("Hello");
394 foreach (/+auto+/ line; s.byLine) {
395 import std.stdio;
396 writeln("LN: [", line, "]");
399 foreach (/+auto+/ line; ugly.byLine) {
400 import std.stdio;
401 writeln("LN: [", line, "]");
406 // string should be detabbed!
407 string outdentAll (const(char)[] s) {
408 import std.array : appender;
409 // first calculate maximum indent spaces
410 uint maxspc = uint.max;
411 foreach (/*auto*/ line; s.byLine) {
412 uint col = 0;
413 while (col < line.length && line.ptr[col] <= ' ') {
414 if (line.ptr[col] == '\t') assert(0, "can't outdent shit with tabs");
415 ++col;
417 if (col >= line.length) continue; // empty line, don't care
418 if (col < maxspc) maxspc = col;
419 if (col == 0) break; // nothing to do anymore
422 auto res = appender!string();
423 foreach (/*auto*/ line; s.byLine) {
424 uint col = 0;
425 while (col < line.length && line.ptr[col] <= ' ') ++col;
426 if (col < line.length) {
427 // non-empty line
428 res.put(line[maxspc..$]);
430 res.put('\n');
433 return res.data;
437 version(test_strex) unittest {
438 enum pretty = q{
439 import std.stdio;
440 void main() {
441 writeln("Hello");
443 }.outdentAll;
445 enum ugly = q{
446 import std.stdio;
447 void main() {
448 writeln("Hello");
453 import std.stdio;
454 assert(pretty == ugly);
458 //From: Yahoo Groups <confirm-s2-2ny0qbq23nljzefbilh5vpjrg1pik5hf-ketmar=ketmar.no-ip.org@yahoogroups.com>
459 private bool isValidEmailNameChar (char ch) pure nothrow @safe @nogc {
460 pragma(inline, true);
461 if (ch <= 32) return false;
462 if (ch >= '0' && ch <= '9') return true;
463 if (ch >= 'a' && ch <= 'z') ch -= 32; // poor man's tolower
464 if (ch >= 'A' && ch <= 'Z') return true;
465 if (ch == '_' || ch == '+' || ch == '-' || ch == '=' || ch == '.' || ch == '$') return true;
466 if (ch >= 128) return true; // why not?
467 // why not?
468 if (ch == '!' || ch == '%' || ch == '^' || ch == '&' || ch == '(' || ch == ')') return true;
469 if (ch == '?') return true;
470 return false;
474 private bool isValidEmailHostChar (char ch) pure nothrow @safe @nogc {
475 pragma(inline, true);
476 if (ch <= 32 || ch >= 127) return false;
477 if (ch >= '0' && ch <= '9') return true;
478 if (ch >= 'a' && ch <= 'z') ch -= 32; // poor man's tolower
479 if (ch >= 'A' && ch <= 'Z') return true;
480 if (ch == '-' || ch == '.') return true;
481 return false;
485 bool isGoodEmail (const(char)[] s) nothrow @trusted @nogc {
486 if (s.length == 0 || s.ptr[0] == '@') return false;
487 // parse part until '@'
488 while (s.length) {
489 char ch = s.ptr[0];
490 if (ch == '@') break;
491 if (!isValidEmailNameChar(ch)) return false;
492 s = s[1..$];
494 if (!s.length) return false; // no doggy
495 assert(s.ptr[0] == '@');
496 s = s[1..$];
497 if (s.length == 0) return false;
498 while (s.length) {
499 char ch = s.ptr[0];
500 if (!isValidEmailHostChar(ch)) return false;
501 s = s[1..$];
503 return true;
507 pure nothrow @system @nogc:
508 version(linux) {
509 extern(C) inout(void)* memmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen);
510 extern(C) inout(void)* memrchr (inout(void)* s, int ch, size_t slen);
511 } else {
512 inout(void)* memmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen) {
513 // size_t is unsigned
514 if (needlelen > haystacklen || needlelen == 0) return null;
515 auto h = cast(const(ubyte)*)haystack;
516 auto n = cast(const(ubyte)*)needle;
517 foreach (immutable i; 0..haystacklen-needlelen+1) {
518 import core.stdc.string : memcmp;
519 if (memcmp(h+i, n, needlelen) == 0) return cast(typeof(return))(h+i);
521 return null;
524 inout(void)* memrchr (inout(void)* haystack, int ch, size_t haystacklen) {
525 // size_t is unsigned
526 if (haystacklen == 0) return null;
527 auto h = cast(const(ubyte)*)haystack;
528 ch &= 0xff;
529 foreach_reverse (immutable idx, ubyte v; h[0..haystacklen]) {
530 if (v == ch) return cast(typeof(return))(h+idx);
532 return null;
536 inout(void)* memrmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen) {
537 if (needlelen > haystacklen) return null;
538 auto h = cast(const(ubyte)*)haystack;
539 const(ubyte)* res = null;
540 // size_t is unsigned
541 if (needlelen > haystacklen || needlelen == 0) return null;
542 version(none) {
543 size_t pos = 0;
544 while (pos < haystacklen-needlelen+1) {
545 auto ff = memmem(haystack+pos, haystacklen-pos, needle, needlelen);
546 if (ff is null) break;
547 res = cast(const(ubyte)*)ff;
548 pos = cast(size_t)(res-haystack)+1;
550 return cast(void*)res;
551 } else {
552 auto n = cast(const(ubyte)*)needle;
553 size_t len = haystacklen-needlelen+1;
554 while (len > 0) {
555 import core.stdc.string : memcmp;
556 auto ff = cast(const(ubyte)*)memrchr(haystack, *n, len);
557 if (ff is null) break;
558 if (memcmp(ff, needle, needlelen) == 0) return cast(void*)ff;
559 //if (ff is h) break;
560 len = cast(size_t)(ff-cast(ubyte*)haystack);
562 return null;