d: Merge upstream dmd, druntime 26f049fb26, phobos 330d6a4fd.
[official-gcc.git] / libphobos / src / std / ascii.d
blob367c981320a3c36897fbaffd16b8949d290e6a7b
1 // Written in the D programming language.
3 /++
4 Functions which operate on ASCII characters.
6 All of the functions in std.ascii accept Unicode characters but
7 effectively ignore them if they're not ASCII. All `isX` functions return
8 `false` for non-ASCII characters, and all `toX` functions do nothing
9 to non-ASCII characters.
11 For functions which operate on Unicode characters, see
12 $(MREF std, uni).
14 $(SCRIPT inhibitQuickIndex = 1;)
15 $(DIVC quickindex,
16 $(BOOKTABLE,
17 $(TR $(TH Category) $(TH Functions))
18 $(TR $(TD Validation) $(TD
19 $(LREF isAlpha)
20 $(LREF isAlphaNum)
21 $(LREF isASCII)
22 $(LREF isControl)
23 $(LREF isDigit)
24 $(LREF isGraphical)
25 $(LREF isHexDigit)
26 $(LREF isOctalDigit)
27 $(LREF isPrintable)
28 $(LREF isPunctuation)
29 $(LREF isUpper)
30 $(LREF isWhite)
32 $(TR $(TD Conversions) $(TD
33 $(LREF toLower)
34 $(LREF toUpper)
36 $(TR $(TD Constants) $(TD
37 $(LREF digits)
38 $(LREF fullHexDigits)
39 $(LREF hexDigits)
40 $(LREF letters)
41 $(LREF lowercase)
42 $(LREF lowerHexDigits)
43 $(LREF newline)
44 $(LREF octalDigits)
45 $(LREF uppercase)
46 $(LREF whitespace)
48 $(TR $(TD Enums) $(TD
49 $(LREF ControlChar)
50 $(LREF LetterCase)
53 References:
54 $(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table),
55 $(HTTP en.wikipedia.org/wiki/Ascii, Wikipedia)
57 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
58 Authors: $(HTTP digitalmars.com, Walter Bright) and
59 $(HTTP jmdavisprog.com, Jonathan M Davis)
60 Source: $(PHOBOSSRC std/ascii.d)
62 module std.ascii;
64 immutable fullHexDigits = "0123456789ABCDEFabcdef"; /// 0 .. 9A .. Fa .. f
65 immutable hexDigits = fullHexDigits[0 .. 16]; /// 0 .. 9A .. F
66 immutable lowerHexDigits = "0123456789abcdef"; /// 0 .. 9a .. f
67 immutable digits = hexDigits[0 .. 10]; /// 0 .. 9
68 immutable octalDigits = digits[0 .. 8]; /// 0 .. 7
69 immutable letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; /// A .. Za .. z
70 immutable uppercase = letters[0 .. 26]; /// A .. Z
71 immutable lowercase = letters[26 .. 52]; /// a .. z
72 immutable whitespace = " \t\v\r\n\f"; /// ASCII _whitespace
74 /++
75 Letter case specifier.
77 enum LetterCase : bool
79 upper, /// Upper case letters
80 lower /// Lower case letters
83 ///
84 @safe unittest
86 import std.conv : to;
88 assert(42.to!string(16, LetterCase.upper) == "2A");
89 assert(42.to!string(16, LetterCase.lower) == "2a");
92 ///
93 @safe unittest
95 import std.digest.hmac : hmac;
96 import std.digest : toHexString;
97 import std.digest.sha : SHA1;
98 import std.string : representation;
100 const sha1HMAC = "A very long phrase".representation
101 .hmac!SHA1("secret".representation)
102 .toHexString!(LetterCase.lower);
103 assert(sha1HMAC == "49f2073c7bf58577e8c9ae59fe8cfd37c9ab94e5");
107 All control characters in the ASCII table ($(HTTPS www.asciitable.com, source)).
109 enum ControlChar : char
111 nul = '\x00', /// Null
112 soh = '\x01', /// Start of heading
113 stx = '\x02', /// Start of text
114 etx = '\x03', /// End of text
115 eot = '\x04', /// End of transmission
116 enq = '\x05', /// Enquiry
117 ack = '\x06', /// Acknowledge
118 bel = '\x07', /// Bell
119 bs = '\x08', /// Backspace
120 tab = '\x09', /// Horizontal tab
121 lf = '\x0A', /// NL line feed, new line
122 vt = '\x0B', /// Vertical tab
123 ff = '\x0C', /// NP form feed, new page
124 cr = '\x0D', /// Carriage return
125 so = '\x0E', /// Shift out
126 si = '\x0F', /// Shift in
127 dle = '\x10', /// Data link escape
128 dc1 = '\x11', /// Device control 1
129 dc2 = '\x12', /// Device control 2
130 dc3 = '\x13', /// Device control 3
131 dc4 = '\x14', /// Device control 4
132 nak = '\x15', /// Negative acknowledge
133 syn = '\x16', /// Synchronous idle
134 etb = '\x17', /// End of transmission block
135 can = '\x18', /// Cancel
136 em = '\x19', /// End of medium
137 sub = '\x1A', /// Substitute
138 esc = '\x1B', /// Escape
139 fs = '\x1C', /// File separator
140 gs = '\x1D', /// Group separator
141 rs = '\x1E', /// Record separator
142 us = '\x1F', /// Unit separator
143 del = '\x7F' /// Delete
147 @safe pure nothrow @nogc unittest
149 import std.algorithm.comparison, std.algorithm.searching, std.range, std.traits;
151 // Because all ASCII characters fit in char, so do these
152 static assert(ControlChar.ack.sizeof == 1);
154 // All control characters except del are in row starting from 0
155 static assert(EnumMembers!ControlChar.only.until(ControlChar.del).equal(iota(32)));
157 static assert(ControlChar.nul == '\0');
158 static assert(ControlChar.bel == '\a');
159 static assert(ControlChar.bs == '\b');
160 static assert(ControlChar.ff == '\f');
161 static assert(ControlChar.lf == '\n');
162 static assert(ControlChar.cr == '\r');
163 static assert(ControlChar.tab == '\t');
164 static assert(ControlChar.vt == '\v');
168 @safe pure nothrow unittest
170 import std.conv;
171 //Control character table can be used in place of hexcodes.
172 with (ControlChar) assert(text("Phobos", us, "Deimos", us, "Tango", rs) == "Phobos\x1FDeimos\x1FTango\x1E");
175 /// Newline sequence for this system.
176 version (Windows)
177 immutable newline = "\r\n";
178 else version (Posix)
179 immutable newline = "\n";
180 else
181 static assert(0, "Unsupported OS");
185 Params: c = The character to test.
186 Returns: Whether `c` is a letter or a number (0 .. 9, a .. z, A .. Z).
188 bool isAlphaNum(dchar c) @safe pure nothrow @nogc
190 const hc = c | 0x20;
191 return ('0' <= c && c <= '9') || ('a' <= hc && hc <= 'z');
195 @safe pure nothrow @nogc unittest
197 assert( isAlphaNum('A'));
198 assert( isAlphaNum('1'));
199 assert(!isAlphaNum('#'));
201 // N.B.: does not return true for non-ASCII Unicode alphanumerics:
202 assert(!isAlphaNum('á'));
205 @safe unittest
207 import std.range;
208 foreach (c; chain(digits, octalDigits, fullHexDigits, letters, lowercase, uppercase))
209 assert(isAlphaNum(c));
211 foreach (c; whitespace)
212 assert(!isAlphaNum(c));
217 Params: c = The character to test.
218 Returns: Whether `c` is an ASCII letter (A .. Z, a .. z).
220 bool isAlpha(dchar c) @safe pure nothrow @nogc
222 // Optimizer can turn this into a bitmask operation on 64 bit code
223 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
227 @safe pure nothrow @nogc unittest
229 assert( isAlpha('A'));
230 assert(!isAlpha('1'));
231 assert(!isAlpha('#'));
233 // N.B.: does not return true for non-ASCII Unicode alphabetic characters:
234 assert(!isAlpha('á'));
237 @safe unittest
239 import std.range;
240 foreach (c; chain(letters, lowercase, uppercase))
241 assert(isAlpha(c));
243 foreach (c; chain(digits, octalDigits, whitespace))
244 assert(!isAlpha(c));
249 Params: c = The character to test.
250 Returns: Whether `c` is a lowercase ASCII letter (a .. z).
252 bool isLower(dchar c) @safe pure nothrow @nogc
254 return c >= 'a' && c <= 'z';
258 @safe pure nothrow @nogc unittest
260 assert( isLower('a'));
261 assert(!isLower('A'));
262 assert(!isLower('#'));
264 // N.B.: does not return true for non-ASCII Unicode lowercase letters
265 assert(!isLower('á'));
266 assert(!isLower('Á'));
269 @safe unittest
271 import std.range;
272 foreach (c; lowercase)
273 assert(isLower(c));
275 foreach (c; chain(digits, uppercase, whitespace))
276 assert(!isLower(c));
281 Params: c = The character to test.
282 Returns: Whether `c` is an uppercase ASCII letter (A .. Z).
284 bool isUpper(dchar c) @safe pure nothrow @nogc
286 return c <= 'Z' && 'A' <= c;
290 @safe pure nothrow @nogc unittest
292 assert( isUpper('A'));
293 assert(!isUpper('a'));
294 assert(!isUpper('#'));
296 // N.B.: does not return true for non-ASCII Unicode uppercase letters
297 assert(!isUpper('á'));
298 assert(!isUpper('Á'));
301 @safe unittest
303 import std.range;
304 foreach (c; uppercase)
305 assert(isUpper(c));
307 foreach (c; chain(digits, lowercase, whitespace))
308 assert(!isUpper(c));
313 Params: c = The character to test.
314 Returns: Whether `c` is a digit (0 .. 9).
316 bool isDigit(dchar c) @safe pure nothrow @nogc
318 return '0' <= c && c <= '9';
322 @safe pure nothrow @nogc unittest
324 assert( isDigit('3'));
325 assert( isDigit('8'));
326 assert(!isDigit('B'));
327 assert(!isDigit('#'));
329 // N.B.: does not return true for non-ASCII Unicode numbers
330 assert(!isDigit('0')); // full-width digit zero (U+FF10)
331 assert(!isDigit('4')); // full-width digit four (U+FF14)
334 @safe unittest
336 import std.range;
337 foreach (c; digits)
338 assert(isDigit(c));
340 foreach (c; chain(letters, whitespace))
341 assert(!isDigit(c));
346 Params: c = The character to test.
347 Returns: Whether `c` is a digit in base 8 (0 .. 7).
349 bool isOctalDigit(dchar c) @safe pure nothrow @nogc
351 return c >= '0' && c <= '7';
355 @safe pure nothrow @nogc unittest
357 assert( isOctalDigit('0'));
358 assert( isOctalDigit('7'));
359 assert(!isOctalDigit('8'));
360 assert(!isOctalDigit('A'));
361 assert(!isOctalDigit('#'));
364 @safe unittest
366 import std.range;
367 foreach (c; octalDigits)
368 assert(isOctalDigit(c));
370 foreach (c; chain(letters, ['8', '9'], whitespace))
371 assert(!isOctalDigit(c));
376 Params: c = The character to test.
377 Returns: Whether `c` is a digit in base 16 (0 .. 9, A .. F, a .. f).
379 bool isHexDigit(dchar c) @safe pure nothrow @nogc
381 const hc = c | 0x20;
382 return ('0' <= c && c <= '9') || ('a' <= hc && hc <= 'f');
386 @safe pure nothrow @nogc unittest
388 assert( isHexDigit('0'));
389 assert( isHexDigit('A'));
390 assert( isHexDigit('f')); // lowercase hex digits are accepted
391 assert(!isHexDigit('g'));
392 assert(!isHexDigit('G'));
393 assert(!isHexDigit('#'));
396 @safe unittest
398 import std.range;
399 foreach (c; fullHexDigits)
400 assert(isHexDigit(c));
402 foreach (c; chain(lowercase[6 .. $], uppercase[6 .. $], whitespace))
403 assert(!isHexDigit(c));
408 Params: c = The character to test.
409 Returns: Whether or not `c` is a whitespace character. That includes the
410 space, tab, vertical tab, form feed, carriage return, and linefeed
411 characters.
413 bool isWhite(dchar c) @safe pure nothrow @nogc
415 return c == ' ' || (c >= 0x09 && c <= 0x0D);
419 @safe pure nothrow @nogc unittest
421 assert( isWhite(' '));
422 assert( isWhite('\t'));
423 assert( isWhite('\n'));
424 assert(!isWhite('1'));
425 assert(!isWhite('a'));
426 assert(!isWhite('#'));
428 // N.B.: Does not return true for non-ASCII Unicode whitespace characters.
429 static import std.uni;
430 assert(std.uni.isWhite('\u00A0'));
431 assert(!isWhite('\u00A0')); // std.ascii.isWhite
434 @safe unittest
436 import std.range;
437 foreach (c; whitespace)
438 assert(isWhite(c));
440 foreach (c; chain(digits, letters))
441 assert(!isWhite(c));
446 Params: c = The character to test.
447 Returns: Whether `c` is a control character.
449 bool isControl(dchar c) @safe pure nothrow @nogc
451 return c < 0x20 || c == 0x7F;
455 @safe pure nothrow @nogc unittest
457 assert( isControl('\0'));
458 assert( isControl('\022'));
459 assert( isControl('\n')); // newline is both whitespace and control
460 assert(!isControl(' '));
461 assert(!isControl('1'));
462 assert(!isControl('a'));
463 assert(!isControl('#'));
465 // N.B.: non-ASCII Unicode control characters are not recognized:
466 assert(!isControl('\u0080'));
467 assert(!isControl('\u2028'));
468 assert(!isControl('\u2029'));
471 @safe unittest
473 import std.range;
474 foreach (dchar c; 0 .. 32)
475 assert(isControl(c));
476 assert(isControl(127));
478 foreach (c; chain(digits, letters, [' ']))
479 assert(!isControl(c));
484 Params: c = The character to test.
485 Returns: Whether or not `c` is a punctuation character. That includes
486 all ASCII characters which are not control characters, letters, digits, or
487 whitespace.
489 bool isPunctuation(dchar c) @safe pure nothrow @nogc
491 return c <= '~' && c >= '!' && !isAlphaNum(c);
495 @safe pure nothrow @nogc unittest
497 assert( isPunctuation('.'));
498 assert( isPunctuation(','));
499 assert( isPunctuation(':'));
500 assert( isPunctuation('!'));
501 assert( isPunctuation('#'));
502 assert( isPunctuation('~'));
503 assert( isPunctuation('+'));
504 assert( isPunctuation('_'));
506 assert(!isPunctuation('1'));
507 assert(!isPunctuation('a'));
508 assert(!isPunctuation(' '));
509 assert(!isPunctuation('\n'));
510 assert(!isPunctuation('\0'));
512 // N.B.: Non-ASCII Unicode punctuation characters are not recognized.
513 assert(!isPunctuation('\u2012')); // (U+2012 = en-dash)
516 @safe unittest
518 foreach (dchar c; 0 .. 128)
520 if (isControl(c) || isAlphaNum(c) || c == ' ')
521 assert(!isPunctuation(c));
522 else
523 assert(isPunctuation(c));
529 Params: c = The character to test.
530 Returns: Whether or not `c` is a printable character other than the
531 space character.
533 bool isGraphical(dchar c) @safe pure nothrow @nogc
535 return '!' <= c && c <= '~';
539 @safe pure nothrow @nogc unittest
541 assert( isGraphical('1'));
542 assert( isGraphical('a'));
543 assert( isGraphical('#'));
544 assert(!isGraphical(' ')); // whitespace is not graphical
545 assert(!isGraphical('\n'));
546 assert(!isGraphical('\0'));
548 // N.B.: Unicode graphical characters are not regarded as such.
549 assert(!isGraphical('á'));
552 @safe unittest
554 foreach (dchar c; 0 .. 128)
556 if (isControl(c) || c == ' ')
557 assert(!isGraphical(c));
558 else
559 assert(isGraphical(c));
565 Params: c = The character to test.
566 Returns: Whether or not `c` is a printable character - including the
567 space character.
569 bool isPrintable(dchar c) @safe pure nothrow @nogc
571 return c >= ' ' && c <= '~';
575 @safe pure nothrow @nogc unittest
577 assert( isPrintable(' ')); // whitespace is printable
578 assert( isPrintable('1'));
579 assert( isPrintable('a'));
580 assert( isPrintable('#'));
581 assert(!isPrintable('\0')); // control characters are not printable
583 // N.B.: Printable non-ASCII Unicode characters are not recognized.
584 assert(!isPrintable('á'));
587 @safe unittest
589 foreach (dchar c; 0 .. 128)
591 if (isControl(c))
592 assert(!isPrintable(c));
593 else
594 assert(isPrintable(c));
600 Params: c = The character to test.
601 Returns: Whether or not `c` is in the ASCII character set - i.e. in the
602 range 0 .. 0x7F.
604 pragma(inline, true)
605 bool isASCII(dchar c) @safe pure nothrow @nogc
607 return c <= 0x7F;
611 @safe pure nothrow @nogc unittest
613 assert( isASCII('a'));
614 assert(!isASCII('á'));
617 @safe unittest
619 foreach (dchar c; 0 .. 128)
620 assert(isASCII(c));
622 assert(!isASCII(128));
627 Converts an ASCII letter to lowercase.
629 Params: c = A character of any type that implicitly converts to `dchar`.
630 In the case where it's a built-in type, or an enum of a built-in type,
631 `Unqual!(OriginalType!C)` is returned, whereas if it's a user-defined
632 type, `dchar` is returned.
634 Returns: The corresponding lowercase letter, if `c` is an uppercase
635 ASCII character, otherwise `c` itself.
637 auto toLower(C)(C c)
638 if (is(C : dchar))
640 import std.traits : OriginalType;
642 static if (!__traits(isScalar, C))
643 alias R = dchar;
644 else static if (is(immutable OriginalType!C == immutable OC, OC))
645 alias R = OC;
647 return isUpper(c) ? cast(R)(cast(R) c + 'a' - 'A') : cast(R) c;
651 @safe pure nothrow @nogc unittest
653 assert(toLower('a') == 'a');
654 assert(toLower('A') == 'a');
655 assert(toLower('#') == '#');
657 // N.B.: Non-ASCII Unicode uppercase letters are not converted.
658 assert(toLower('Á') == 'Á');
661 @safe pure nothrow unittest
664 import std.meta;
665 static foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
667 foreach (i, c; uppercase)
668 assert(toLower(cast(C) c) == lowercase[i]);
670 foreach (C c; 0 .. 128)
672 if (c < 'A' || c > 'Z')
673 assert(toLower(c) == c);
674 else
675 assert(toLower(c) != c);
678 foreach (C c; 128 .. C.max)
679 assert(toLower(c) == c);
681 //CTFE
682 static assert(toLower(cast(C)'a') == 'a');
683 static assert(toLower(cast(C)'A') == 'a');
689 Converts an ASCII letter to uppercase.
691 Params: c = Any type which implicitly converts to `dchar`. In the case
692 where it's a built-in type, or an enum of a built-in type,
693 `Unqual!(OriginalType!C)` is returned, whereas if it's a user-defined
694 type, `dchar` is returned.
696 Returns: The corresponding uppercase letter, if `c` is a lowercase ASCII
697 character, otherwise `c` itself.
699 auto toUpper(C)(C c)
700 if (is(C : dchar))
702 import std.traits : OriginalType;
704 static if (!__traits(isScalar, C))
705 alias R = dchar;
706 else static if (is(immutable OriginalType!C == immutable OC, OC))
707 alias R = OC;
709 return isLower(c) ? cast(R)(cast(R) c - ('a' - 'A')) : cast(R) c;
713 @safe pure nothrow @nogc unittest
715 assert(toUpper('a') == 'A');
716 assert(toUpper('A') == 'A');
717 assert(toUpper('#') == '#');
719 // N.B.: Non-ASCII Unicode lowercase letters are not converted.
720 assert(toUpper('á') == 'á');
723 @safe pure nothrow unittest
725 import std.meta;
726 static foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
728 foreach (i, c; lowercase)
729 assert(toUpper(cast(C) c) == uppercase[i]);
731 foreach (C c; 0 .. 128)
733 if (c < 'a' || c > 'z')
734 assert(toUpper(c) == c);
735 else
736 assert(toUpper(c) != c);
739 foreach (C c; 128 .. C.max)
740 assert(toUpper(c) == c);
742 //CTFE
743 static assert(toUpper(cast(C)'a') == 'A');
744 static assert(toUpper(cast(C)'A') == 'A');
749 @safe unittest //Test both toUpper and toLower with non-builtin
751 import std.meta;
752 import std.traits;
754 //User Defined [Char|Wchar|Dchar]
755 static struct UDC { char c; alias c this; }
756 static struct UDW { wchar c; alias c this; }
757 static struct UDD { dchar c; alias c this; }
758 //[Char|Wchar|Dchar] Enum
759 enum CE : char {a = 'a', A = 'A'}
760 enum WE : wchar {a = 'a', A = 'A'}
761 enum DE : dchar {a = 'a', A = 'A'}
762 //User Defined [Char|Wchar|Dchar] Enum
763 enum UDCE : UDC {a = UDC('a'), A = UDC('A')}
764 enum UDWE : UDW {a = UDW('a'), A = UDW('A')}
765 enum UDDE : UDD {a = UDD('a'), A = UDD('A')}
767 //User defined types with implicit cast to dchar test.
768 static foreach (Char; AliasSeq!(UDC, UDW, UDD))
770 assert(toLower(Char('a')) == 'a');
771 assert(toLower(Char('A')) == 'a');
772 static assert(toLower(Char('a')) == 'a');
773 static assert(toLower(Char('A')) == 'a');
774 static assert(toUpper(Char('a')) == 'A');
775 static assert(toUpper(Char('A')) == 'A');
778 //Various enum tests.
779 static foreach (Enum; AliasSeq!(CE, WE, DE, UDCE, UDWE, UDDE))
781 assert(toLower(Enum.a) == 'a');
782 assert(toLower(Enum.A) == 'a');
783 assert(toUpper(Enum.a) == 'A');
784 assert(toUpper(Enum.A) == 'A');
785 static assert(toLower(Enum.a) == 'a');
786 static assert(toLower(Enum.A) == 'a');
787 static assert(toUpper(Enum.a) == 'A');
788 static assert(toUpper(Enum.A) == 'A');
791 //Return value type tests for enum of non-UDT. These should be the original type.
792 static foreach (T; AliasSeq!(CE, WE, DE))
794 alias C = OriginalType!T;
795 static assert(is(typeof(toLower(T.init)) == C));
796 static assert(is(typeof(toUpper(T.init)) == C));
799 //Return value tests for UDT and enum of UDT. These should be dchar
800 static foreach (T; AliasSeq!(UDC, UDW, UDD, UDCE, UDWE, UDDE))
802 static assert(is(typeof(toLower(T.init)) == dchar));
803 static assert(is(typeof(toUpper(T.init)) == dchar));