iv.vfs: don't turn "w+" mode to "r+" mode, lol
[iv.d.git] / strex.d
blob6cf949c112312144f22f6d831e0c3b6a5cc98813
1 /* Invisible Vector Library
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 // some string operations: quoting, `indexOf()` for non-utf8
19 module iv.strex /*is aliced*/;
22 /// quote string: append double quotes, screen all special chars;
23 /// so quoted string forms valid D string literal.
24 /// allocates.
25 string quote (const(char)[] s) {
26 import std.array : appender;
27 import std.format : formatElement, FormatSpec;
28 auto res = appender!string();
29 FormatSpec!char fspc; // defaults to 's'
30 formatElement(res, s, fspc);
31 return res.data;
35 /// convert integral number to number with commas
36 char[] intWithCommas(T) (char[] dest, T nn, char comma=',') if (__traits(isIntegral, T)) {
37 static if (__traits(isUnsigned, T)) {
38 enum neg = false;
39 alias n = nn;
40 } else {
41 bool neg = (nn < 0);
42 static if (T.sizeof < 8) {
43 long n = nn;
44 if (neg) n = -n;
45 if (n < 0) n = T.max;
46 } else {
47 alias n = nn;
48 if (neg) n = -n;
49 if (n < 0) n = T.max; //FIXME
52 char[256] buf = void;
53 int bpos = cast(int)buf.length;
54 int leftToComma = 3;
55 do {
56 if (leftToComma-- == 0) { buf[--bpos] = comma; leftToComma = 2; }
57 buf[--bpos] = cast(char)('0'+n%10);
58 } while ((n /= 10) != 0);
59 if (neg) buf[--bpos] = '-';
60 auto len = buf.length-bpos;
61 if (dest is null) dest = new char[](len);
62 if (len > dest.length) len = dest.length;
63 dest[0..len] = buf[bpos..bpos+len];
64 return dest[0..len];
67 char[] intWithCommas(T) (T nn, char comma=',') if (__traits(isIntegral, T)) { return intWithCommas(null, nn, comma); }
70 char tolower (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'A' && ch <= 'Z' ? cast(char)(ch-'A'+'a') : ch); }
71 char toupper (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'a' && ch <= 'z' ? cast(char)(ch-'a'+'A') : ch); }
73 bool islower (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'a' && ch <= 'z'); }
74 bool isupper (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'A' && ch <= 'Z'); }
76 bool isalpha (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')); }
77 bool isdigit (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= '0' && ch <= '9'); }
78 bool isalnum (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')); }
79 bool isxdigit (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f') || (ch >= '0' && ch <= '9')); }
81 int digitInBase (char ch, int base=10) pure nothrow @trusted @nogc {
82 pragma(inline, true);
83 return
84 ch >= '0' && ch <= '9' && ch-'0' < base ? ch-'0' :
85 base > 10 && ch >= 'A' && ch < 'Z' && ch-'A'+10 < base ? ch-'A'+10 :
86 base > 10 && ch >= 'a' && ch < 'z' && ch-'a'+10 < base ? ch-'a'+10 :
87 -1;
91 alias atof = atofd!float; /// very simple atof/atod converter. accepts exponents. returns NaN on error.
92 alias atod = atofd!double; /// very simple atof/atod converter. accepts exponents. returns NaN on error.
94 /// very simple atof/atod converter. accepts exponents.
95 /// returns NaN on error.
96 T atofd(T) (const(char)[] str) pure nothrow @trusted @nogc if (is(T == float) || is(T == double)) {
97 if (str.length == 0) return T.nan; // oops
99 const(char)[] s = str;
100 double res = 0.0, sign = 1.0;
101 bool hasIntPart = false, hasFracPart = false;
103 char peekChar () nothrow @trusted @nogc { pragma(inline, true); return (s.length ? s.ptr[0] : '\0'); }
104 void skipChar () nothrow @trusted @nogc { pragma(inline, true); if (s.length > 0) s = s[1..$]; }
105 char getChar () nothrow @trusted @nogc { char ch = 0; if (s.length > 0) { ch = s.ptr[0]; s = s[1..$]; } return ch; }
107 // optional sign
108 switch (peekChar) {
109 case '-': sign = -1; goto case;
110 case '+': skipChar(); break;
111 default: break;
114 // integer part
115 if (isdigit(peekChar)) {
116 hasIntPart = true;
117 while (isdigit(peekChar)) res = res*10.0+(getChar()-'0');
120 // fractional part.
121 if (peekChar == '.') {
122 skipChar(); // skip '.'
123 if (isdigit(peekChar)) {
124 hasFracPart = true;
125 int divisor = 1;
126 long num = 0;
127 while (isdigit(peekChar)) {
128 divisor *= 10;
129 num = num*10+(getChar()-'0');
131 res += cast(double)num/divisor;
135 // valid number should have integer or fractional part
136 if (!hasIntPart && !hasFracPart) return T.nan;
138 // optional exponent
139 if (peekChar == 'e' || peekChar == 'E') {
140 skipChar(); // skip 'E'
141 // optional sign
142 bool epositive = true;
143 switch (peekChar) {
144 case '-': epositive = false; goto case;
145 case '+': skipChar(); break;
146 default: break;
148 int expPart = 0;
149 while (isdigit(peekChar)) expPart = expPart*10+(getChar()-'0');
150 if (epositive) {
151 foreach (immutable _; 0..expPart) res *= 10.0;
152 } else {
153 foreach (immutable _; 0..expPart) res /= 10.0;
157 return cast(T)(res*sign);
161 // ascii only
162 bool strEquCI (const(char)[] s0, const(char)[] s1) pure nothrow @trusted @nogc {
163 if (s0.length != s1.length) return false;
164 foreach (immutable idx, char c0; s0) {
165 if (__ctfe) {
166 if (c0.tolower != s1[idx].tolower) return false;
167 } else {
168 if (c0.tolower != s1.ptr[idx].tolower) return false;
171 return true;
175 // ascii only
176 int strCmpCI (const(char)[] s0, const(char)[] s1) pure nothrow @trusted @nogc {
177 auto slen = s0.length;
178 if (slen > s1.length) slen = s1.length;
179 char c1;
180 foreach (immutable idx, char c0; s0[0..slen]) {
181 c0 = c0.tolower;
182 if (__ctfe) {
183 c1 = s1[idx].tolower;
184 } else {
185 c1 = s1.ptr[idx].tolower;
187 if (c0 < c1) return -1;
188 if (c0 > c1) return 1;
190 if (s0.length < s1.length) return -1;
191 if (s0.length > s1.length) return 1;
192 return 0;
194 if (s0.length < s1.length) return -1;
195 if (s0.length > s1.length) return 1;
196 char c1;
197 foreach (immutable idx, char c0; s0) {
198 c0 = c0.tolower;
199 if (__ctfe) {
200 c1 = s1[idx].tolower;
201 } else {
202 c1 = s1.ptr[idx].tolower;
204 if (c0 < c1) return -1;
205 if (c0 > c1) return 1;
207 return 0;
212 inout(char)[] xstrip (inout(char)[] s) pure nothrow @trusted @nogc {
213 while (s.length && s.ptr[0] <= ' ') s = s[1..$];
214 while (s.length && s[$-1] <= ' ') s = s[0..$-1];
215 return s;
219 inout(char)[] xstripleft (inout(char)[] s) pure nothrow @trusted @nogc {
220 while (s.length && s.ptr[0] <= ' ') s = s[1..$];
221 return s;
225 inout(char)[] xstripright (inout(char)[] s) pure nothrow @trusted @nogc {
226 while (s.length && s[$-1] <= ' ') s = s[0..$-1];
227 return s;
231 bool startsWith (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
232 import core.stdc.string : memcmp;
233 if (pat.length > str.length) return false;
234 return (memcmp(str.ptr, pat.ptr, pat.length) == 0);
238 bool endsWith (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
239 import core.stdc.string : memcmp;
240 if (pat.length > str.length) return false;
241 return (memcmp(str.ptr+str.length-pat.length, pat.ptr, pat.length) == 0);
245 bool startsWithCI (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
246 if (pat.length > str.length) return false;
247 auto s = cast(const(char)*)str.ptr;
248 auto p = cast(const(char)*)pat.ptr;
249 foreach (immutable _; 0..pat.length) if (tolower(*s++) != tolower(*p++)) return false;
250 return true;
254 bool endsWithCI (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
255 if (pat.length > str.length) return false;
256 auto s = cast(const(char)*)str.ptr+str.length-pat.length;
257 auto p = cast(const(char)*)pat.ptr;
258 foreach (immutable _; 0..pat.length) if (tolower(*s++) != tolower(*p++)) return false;
259 return true;
263 ptrdiff_t indexOf (const(char)[] hay, const(char)[] need, size_t stIdx=0) pure nothrow @trusted @nogc {
264 if (hay.length <= stIdx || need.length == 0 || need.length > hay.length-stIdx) {
265 return -1;
266 } else {
267 auto res = cast(char*)memmem(hay.ptr+stIdx, hay.length-stIdx, need.ptr, need.length);
268 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
272 ptrdiff_t indexOf (const(char)[] hay, char ch, size_t stIdx=0) pure nothrow @trusted @nogc {
273 return indexOf(hay, (&ch)[0..1], stIdx);
277 ptrdiff_t lastIndexOf (const(char)[] hay, const(char)[] need, size_t stIdx=0) pure nothrow @trusted @nogc {
278 if (hay.length <= stIdx || need.length == 0 || need.length > hay.length-stIdx) {
279 return -1;
280 } else {
281 auto res = cast(char*)memrmem(hay.ptr+stIdx, hay.length-stIdx, need.ptr, need.length);
282 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
286 ptrdiff_t lastIndexOf (const(char)[] hay, char ch, size_t stIdx=0) pure nothrow @trusted @nogc {
287 return lastIndexOf(hay, (&ch)[0..1], stIdx);
291 version(test_strex) unittest {
292 assert(indexOf("Alice & Miriel", " & ") == 5);
293 assert(indexOf("Alice & Miriel", " &!") == -1);
294 assert(indexOf("Alice & Miriel", "Alice & Miriel was here!") == -1);
295 assert(indexOf("Alice & Miriel", '&') == 6);
296 char ch = ' ';
297 assert(indexOf("Alice & Miriel", ch) == 5);
299 assert(indexOf("Alice & Miriel", "i") == 2);
300 assert(indexOf("Alice & Miriel", "i", 6) == 9);
301 assert(indexOf("Alice & Miriel", "i", 12) == -1);
303 assert(indexOf("Alice & Miriel", "Miriel", 8) == 8);
304 assert(indexOf("Alice & Miriel", "Miriel", 9) == -1);
306 assert(lastIndexOf("Alice & Miriel", "i") == 11);
307 assert(lastIndexOf("Alice & Miriel", "i", 6) == 11);
308 assert(lastIndexOf("Alice & Miriel", "i", 11) == 11);
309 assert(lastIndexOf("Alice & Miriel", "i", 12) == -1);
311 assert(lastIndexOf("iiii", "ii") == 2);
315 string detab (const(char)[] s, uint tabSize=8) {
316 assert(tabSize > 0);
318 import std.array : appender;
319 auto res = appender!string();
320 uint col = 0;
322 foreach (char ch; s) {
323 if (ch == '\n' || ch == '\r') {
324 col = 0;
325 } else if (ch == '\t') {
326 auto spins = tabSize-col%tabSize;
327 col += spins;
328 while (spins-- > 1) res.put(' ');
329 ch = ' ';
330 } else {
331 ++col;
333 res.put(ch);
336 return res.data;
340 version(test_strex) unittest {
341 assert(detab(" \n\tx", 9) == " \n x");
342 assert(detab(" ab\t asdf ") == " ab asdf ");
346 auto byLine(T) (T s) if (is(T : const(char)[])) {
347 static struct Range(T) {
348 nothrow @safe @nogc:
349 private:
350 T s;
351 size_t llen, npos;
352 this (T as) { s = as; popFront(); }
353 public:
354 @property bool empty () const { pragma(inline, true); return (s.length == 0); }
355 @property T front () const { pragma(inline, true); return cast(T)s[0..llen]; } // fuckin' const!
356 auto save () const @trusted { Range!T res = void; res.s = s; res.llen = llen; res.npos = npos; return res; }
357 void popFront () @trusted {
358 s = s[npos..$];
359 llen = npos = 0;
360 while (npos < s.length) {
361 if (s.ptr[npos] == '\r') {
362 llen = npos;
363 if (s.length-npos > 1 && s.ptr[npos+1] == '\n') ++npos;
364 ++npos;
365 return;
367 if (s.ptr[npos] == '\n') {
368 llen = npos;
369 ++npos;
370 return;
372 ++npos;
374 llen = npos;
377 return Range!T(s);
381 version(test_strex) unittest {
382 enum s = q{
383 import std.stdio;
384 void main() {
385 writeln("Hello");
388 enum ugly = q{
389 import std.stdio;
390 void main() {
391 writeln("Hello");
395 foreach (/+auto+/ line; s.byLine) {
396 import std.stdio;
397 writeln("LN: [", line, "]");
400 foreach (/+auto+/ line; ugly.byLine) {
401 import std.stdio;
402 writeln("LN: [", line, "]");
407 // string should be detabbed!
408 string outdentAll (const(char)[] s) {
409 import std.array : appender;
410 // first calculate maximum indent spaces
411 uint maxspc = uint.max;
412 foreach (/*auto*/ line; s.byLine) {
413 uint col = 0;
414 while (col < line.length && line.ptr[col] <= ' ') {
415 if (line.ptr[col] == '\t') assert(0, "can't outdent shit with tabs");
416 ++col;
418 if (col >= line.length) continue; // empty line, don't care
419 if (col < maxspc) maxspc = col;
420 if (col == 0) break; // nothing to do anymore
423 auto res = appender!string();
424 foreach (/*auto*/ line; s.byLine) {
425 uint col = 0;
426 while (col < line.length && line.ptr[col] <= ' ') ++col;
427 if (col < line.length) {
428 // non-empty line
429 res.put(line[maxspc..$]);
431 res.put('\n');
434 return res.data;
438 version(test_strex) unittest {
439 enum pretty = q{
440 import std.stdio;
441 void main() {
442 writeln("Hello");
444 }.outdentAll;
446 enum ugly = q{
447 import std.stdio;
448 void main() {
449 writeln("Hello");
454 import std.stdio;
455 assert(pretty == ugly);
459 pure nothrow @system @nogc:
460 version(linux) {
461 extern(C) inout(void)* memmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen);
462 extern(C) inout(void)* memrchr (inout(void)* s, int ch, size_t slen);
463 } else {
464 inout(void)* memmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen) {
465 // size_t is unsigned
466 if (needlelen > haystacklen || needlelen == 0) return null;
467 auto h = cast(const(ubyte)*)haystack;
468 auto n = cast(const(ubyte)*)needle;
469 foreach (immutable i; 0..haystacklen-needlelen+1) {
470 import core.stdc.string : memcmp;
471 if (memcmp(h+i, n, needlelen) == 0) return cast(typeof(return))(h+i);
473 return null;
476 inout(void)* memrchr (inout(void)* haystack, int ch, size_t haystacklen) {
477 // size_t is unsigned
478 if (haystacklen == 0) return null;
479 auto h = cast(const(ubyte)*)haystack;
480 ch &= 0xff;
481 foreach_reverse (immutable idx, ubyte v; h[0..haystacklen]) {
482 if (v == ch) return cast(typeof(return))(h+idx);
484 return null;
488 inout(void)* memrmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen) {
489 if (needlelen > haystacklen) return null;
490 auto h = cast(const(ubyte)*)haystack;
491 const(ubyte)* res = null;
492 // size_t is unsigned
493 if (needlelen > haystacklen || needlelen == 0) return null;
494 version(none) {
495 size_t pos = 0;
496 while (pos < haystacklen-needlelen+1) {
497 auto ff = memmem(haystack+pos, haystacklen-pos, needle, needlelen);
498 if (ff is null) break;
499 res = cast(const(ubyte)*)ff;
500 pos = cast(size_t)(res-haystack)+1;
502 return cast(void*)res;
503 } else {
504 auto n = cast(const(ubyte)*)needle;
505 size_t len = haystacklen-needlelen+1;
506 while (len > 0) {
507 import core.stdc.string : memcmp;
508 auto ff = cast(const(ubyte)*)memrchr(haystack, *n, len);
509 if (ff is null) break;
510 if (memcmp(ff, needle, needlelen) == 0) return cast(void*)ff;
511 //if (ff is h) break;
512 len = cast(size_t)(ff-cast(ubyte*)haystack);
514 return null;