cosmetix
[iv.d.git] / strex.d
blobe4c958d3e92f90958ff78cc79ef2269a97fd8b8a
1 /* Invisible Vector Library
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 // some string operations: quoting, `indexOf()` for non-utf8
19 module iv.strex /*is aliced*/;
22 /// quote string: append double quotes, screen all special chars;
23 /// so quoted string forms valid D string literal.
24 /// allocates.
25 string quote (const(char)[] s) {
26 import std.array : appender;
27 import std.format : formatElement, FormatSpec;
28 auto res = appender!string();
29 FormatSpec!char fspc; // defaults to 's'
30 formatElement(res, s, fspc);
31 return res.data;
35 /// convert integral number to number with commas
36 char[] intWithCommas(T) (char[] dest, T nn, char comma=',') if (__traits(isIntegral, T)) {
37 static if (__traits(isUnsigned, T)) {
38 enum neg = false;
39 alias n = nn;
40 } else {
41 bool neg = (nn < 0);
42 static if (T.sizeof < 8) {
43 long n = nn;
44 if (neg) n = -n;
45 if (n < 0) n = T.max;
46 } else {
47 alias n = nn;
48 if (neg) n = -n;
49 if (n < 0) n = T.max; //FIXME
52 char[256] buf = void;
53 int bpos = cast(int)buf.length;
54 int leftToComma = 3;
55 do {
56 if (leftToComma-- == 0) { buf[--bpos] = comma; leftToComma = 2; }
57 buf[--bpos] = cast(char)('0'+n%10);
58 } while ((n /= 10) != 0);
59 if (neg) buf[--bpos] = '-';
60 auto len = buf.length-bpos;
61 if (dest is null) dest = new char[](len);
62 if (len > dest.length) len = dest.length;
63 dest[0..len] = buf[bpos..bpos+len];
64 return dest[0..len];
67 char[] intWithCommas(T) (T nn, char comma=',') if (__traits(isIntegral, T)) { return intWithCommas(null, nn, comma); }
70 char tolower (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'A' && ch <= 'Z' ? cast(char)(ch-'A'+'a') : ch); }
71 char toupper (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'a' && ch <= 'z' ? cast(char)(ch-'a'+'A') : ch); }
73 bool islower (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'a' && ch <= 'z'); }
74 bool isupper (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= 'A' && ch <= 'Z'); }
76 bool isalpha (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')); }
77 bool isdigit (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return (ch >= '0' && ch <= '9'); }
78 bool isalnum (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')); }
79 bool isxdigit (char ch) pure nothrow @trusted @nogc { pragma(inline, true); return ((ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f') || (ch >= '0' && ch <= '9')); }
81 int digitInBase (char ch, int base=10) pure nothrow @trusted @nogc {
82 pragma(inline, true);
83 return
84 ch >= '0' && ch <= '9' && ch-'0' < base ? ch-'0' :
85 base > 10 && ch >= 'A' && ch < 'Z' && ch-'A'+10 < base ? ch-'A'+10 :
86 base > 10 && ch >= 'a' && ch < 'z' && ch-'a'+10 < base ? ch-'a'+10 :
87 -1;
91 // ascii only
92 bool strEquCI (const(char)[] s0, const(char)[] s1) pure nothrow @trusted @nogc {
93 if (s0.length != s1.length) return false;
94 foreach (immutable idx, char c0; s0) {
95 if (__ctfe) {
96 if (c0.tolower != s1[idx].tolower) return false;
97 } else {
98 if (c0.tolower != s1.ptr[idx].tolower) return false;
101 return true;
105 // ascii only
106 int strCmpCI (const(char)[] s0, const(char)[] s1) pure nothrow @trusted @nogc {
107 auto slen = s0.length;
108 if (slen > s1.length) slen = s1.length;
109 char c1;
110 foreach (immutable idx, char c0; s0[0..slen]) {
111 c0 = c0.tolower;
112 if (__ctfe) {
113 c1 = s1[idx].tolower;
114 } else {
115 c1 = s1.ptr[idx].tolower;
117 if (c0 < c1) return -1;
118 if (c0 > c1) return 1;
120 if (s0.length < s1.length) return -1;
121 if (s0.length > s1.length) return 1;
122 return 0;
124 if (s0.length < s1.length) return -1;
125 if (s0.length > s1.length) return 1;
126 char c1;
127 foreach (immutable idx, char c0; s0) {
128 c0 = c0.tolower;
129 if (__ctfe) {
130 c1 = s1[idx].tolower;
131 } else {
132 c1 = s1.ptr[idx].tolower;
134 if (c0 < c1) return -1;
135 if (c0 > c1) return 1;
137 return 0;
142 inout(char)[] xstrip (inout(char)[] s) pure nothrow @trusted @nogc {
143 while (s.length && s.ptr[0] <= ' ') s = s[1..$];
144 while (s.length && s[$-1] <= ' ') s = s[0..$-1];
145 return s;
149 inout(char)[] xstripleft (inout(char)[] s) pure nothrow @trusted @nogc {
150 while (s.length && s.ptr[0] <= ' ') s = s[1..$];
151 return s;
155 inout(char)[] xstripright (inout(char)[] s) pure nothrow @trusted @nogc {
156 while (s.length && s[$-1] <= ' ') s = s[0..$-1];
157 return s;
161 bool startsWith (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
162 import core.stdc.string : memcmp;
163 if (pat.length > str.length) return false;
164 return (memcmp(str.ptr, pat.ptr, pat.length) == 0);
168 bool endsWith (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
169 import core.stdc.string : memcmp;
170 if (pat.length > str.length) return false;
171 return (memcmp(str.ptr+str.length-pat.length, pat.ptr, pat.length) == 0);
175 bool startsWithCI (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
176 if (pat.length > str.length) return false;
177 auto s = cast(const(char)*)str.ptr;
178 auto p = cast(const(char)*)pat.ptr;
179 foreach (immutable _; 0..pat.length) if (tolower(*s++) != tolower(*p++)) return false;
180 return true;
184 bool endsWithCI (const(char)[] str, const(char)[] pat) pure nothrow @trusted @nogc {
185 if (pat.length > str.length) return false;
186 auto s = cast(const(char)*)str.ptr+str.length-pat.length;
187 auto p = cast(const(char)*)pat.ptr;
188 foreach (immutable _; 0..pat.length) if (tolower(*s++) != tolower(*p++)) return false;
189 return true;
193 ptrdiff_t indexOf (const(char)[] hay, const(char)[] need, size_t stIdx=0) pure nothrow @trusted @nogc {
194 if (hay.length <= stIdx || need.length == 0 || need.length > hay.length-stIdx) {
195 return -1;
196 } else {
197 auto res = memmem(hay.ptr+stIdx, hay.length-stIdx, need.ptr, need.length);
198 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
202 ptrdiff_t indexOf (const(char)[] hay, char ch, size_t stIdx=0) pure nothrow @trusted @nogc {
203 return indexOf(hay, (&ch)[0..1], stIdx);
207 ptrdiff_t lastIndexOf (const(char)[] hay, const(char)[] need, size_t stIdx=0) pure nothrow @trusted @nogc {
208 if (hay.length <= stIdx || need.length == 0 || need.length > hay.length-stIdx) {
209 return -1;
210 } else {
211 auto res = memrmem(hay.ptr+stIdx, hay.length-stIdx, need.ptr, need.length);
212 return (res !is null ? cast(ptrdiff_t)(res-hay.ptr) : -1);
216 ptrdiff_t lastIndexOf (const(char)[] hay, char ch, size_t stIdx=0) pure nothrow @trusted @nogc {
217 return lastIndexOf(hay, (&ch)[0..1], stIdx);
221 version(test_strex) unittest {
222 assert(indexOf("Alice & Miriel", " & ") == 5);
223 assert(indexOf("Alice & Miriel", " &!") == -1);
224 assert(indexOf("Alice & Miriel", "Alice & Miriel was here!") == -1);
225 assert(indexOf("Alice & Miriel", '&') == 6);
226 char ch = ' ';
227 assert(indexOf("Alice & Miriel", ch) == 5);
229 assert(indexOf("Alice & Miriel", "i") == 2);
230 assert(indexOf("Alice & Miriel", "i", 6) == 9);
231 assert(indexOf("Alice & Miriel", "i", 12) == -1);
233 assert(indexOf("Alice & Miriel", "Miriel", 8) == 8);
234 assert(indexOf("Alice & Miriel", "Miriel", 9) == -1);
236 assert(lastIndexOf("Alice & Miriel", "i") == 11);
237 assert(lastIndexOf("Alice & Miriel", "i", 6) == 11);
238 assert(lastIndexOf("Alice & Miriel", "i", 11) == 11);
239 assert(lastIndexOf("Alice & Miriel", "i", 12) == -1);
241 assert(lastIndexOf("iiii", "ii") == 2);
245 string detab (const(char)[] s, uint tabSize=8) {
246 assert(tabSize > 0);
248 import std.array : appender;
249 auto res = appender!string();
250 uint col = 0;
252 foreach (char ch; s) {
253 if (ch == '\n' || ch == '\r') {
254 col = 0;
255 } else if (ch == '\t') {
256 auto spins = tabSize-col%tabSize;
257 col += spins;
258 while (spins-- > 1) res.put(' ');
259 ch = ' ';
260 } else {
261 ++col;
263 res.put(ch);
266 return res.data;
270 version(test_strex) unittest {
271 assert(detab(" \n\tx", 9) == " \n x");
272 assert(detab(" ab\t asdf ") == " ab asdf ");
276 auto byLine(T) (T s) if (is(T : const(char)[])) {
277 static struct Range(T) {
278 nothrow @safe @nogc:
279 private:
280 T s;
281 size_t llen, npos;
282 this (T as) { s = as; popFront(); }
283 public:
284 @property bool empty () const { pragma(inline, true); return (s.length == 0); }
285 @property T front () const { pragma(inline, true); return cast(T)s[0..llen]; } // fuckin' const!
286 auto save () const @trusted { Range!T res = void; res.s = s; res.llen = llen; res.npos = npos; return res; }
287 void popFront () @trusted {
288 s = s[npos..$];
289 llen = npos = 0;
290 while (npos < s.length) {
291 if (s.ptr[npos] == '\r') {
292 llen = npos;
293 if (s.length-npos > 1 && s.ptr[npos+1] == '\n') ++npos;
294 ++npos;
295 return;
297 if (s.ptr[npos] == '\n') {
298 llen = npos;
299 ++npos;
300 return;
302 ++npos;
304 llen = npos;
307 return Range!T(s);
311 version(test_strex) unittest {
312 enum s = q{
313 import std.stdio;
314 void main() {
315 writeln("Hello");
318 enum ugly = q{
319 import std.stdio;
320 void main() {
321 writeln("Hello");
325 foreach (/+auto+/ line; s.byLine) {
326 import std.stdio;
327 writeln("LN: [", line, "]");
330 foreach (/+auto+/ line; ugly.byLine) {
331 import std.stdio;
332 writeln("LN: [", line, "]");
337 // string should be detabbed!
338 string outdentAll (const(char)[] s) {
339 import std.array : appender;
340 // first calculate maximum indent spaces
341 uint maxspc = uint.max;
342 foreach (/*auto*/ line; s.byLine) {
343 uint col = 0;
344 while (col < line.length && line.ptr[col] <= ' ') {
345 if (line.ptr[col] == '\t') assert(0, "can't outdent shit with tabs");
346 ++col;
348 if (col >= line.length) continue; // empty line, don't care
349 if (col < maxspc) maxspc = col;
350 if (col == 0) break; // nothing to do anymore
353 auto res = appender!string();
354 foreach (/*auto*/ line; s.byLine) {
355 uint col = 0;
356 while (col < line.length && line.ptr[col] <= ' ') ++col;
357 if (col < line.length) {
358 // non-empty line
359 res.put(line[maxspc..$]);
361 res.put('\n');
364 return res.data;
368 version(test_strex) unittest {
369 enum pretty = q{
370 import std.stdio;
371 void main() {
372 writeln("Hello");
374 }.outdentAll;
376 enum ugly = q{
377 import std.stdio;
378 void main() {
379 writeln("Hello");
384 import std.stdio;
385 assert(pretty == ugly);
389 pure nothrow @system @nogc:
390 version(linux) {
391 extern(C) inout(void)* memmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen);
392 extern(C) inout(void)* memrchr (inout(void)* s, int ch, size_t slen);
393 } else {
394 inout(void)* memmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen) {
395 // size_t is unsigned
396 if (needlelen > haystacklen || needlelen == 0) return null;
397 auto h = cast(const(ubyte)*)haystack;
398 auto n = cast(const(ubyte)*)needle;
399 foreach (immutable i; 0..haystacklen-needlelen+1) {
400 import core.stdc.string : memcmp;
401 if (memcmp(h+i, n, needlelen) == 0) return cast(typeof(return))(h+i);
403 return null;
406 inout(void)* memrchr (inout(void)* haystack, int ch, size_t haystacklen) {
407 // size_t is unsigned
408 if (haystacklen == 0) return null;
409 auto h = cast(const(ubyte)*)haystack;
410 ch &= 0xff;
411 foreach_reverse (immutable idx, ubyte v; h[0..haystacklen]) {
412 if (v == ch) return cast(typeof(return))(h+idx);
414 return null;
418 inout(void)* memrmem (inout(void)* haystack, size_t haystacklen, inout(void)* needle, size_t needlelen) {
419 if (needlelen > haystacklen) return null;
420 auto h = cast(const(ubyte)*)haystack;
421 const(ubyte)* res = null;
422 // size_t is unsigned
423 if (needlelen > haystacklen || needlelen == 0) return null;
424 version(none) {
425 size_t pos = 0;
426 while (pos < haystacklen-needlelen+1) {
427 auto ff = memmem(haystack+pos, haystacklen-pos, needle, needlelen);
428 if (ff is null) break;
429 res = cast(const(ubyte)*)ff;
430 pos = cast(size_t)(res-haystack)+1;
432 return cast(void*)res;
433 } else {
434 auto n = cast(const(ubyte)*)needle;
435 size_t len = haystacklen-needlelen+1;
436 while (len > 0) {
437 import core.stdc.string : memcmp;
438 auto ff = cast(const(ubyte)*)memrchr(haystack, *n, len);
439 if (ff is null) break;
440 if (memcmp(ff, needle, needlelen) == 0) return cast(void*)ff;
441 //if (ff is h) break;
442 len = cast(size_t)(ff-haystack);
444 return null;