1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // Bit masks for each code point under U+0100, for fast lookup.
9 pC
= 1 << iota // a control character.
10 pP
// a punctuation character.
12 pS
// a symbolic character.
13 pZ
// a spacing character.
14 pLu
// an upper-case letter.
15 pLl
// a lower-case letter.
16 pp
// a printable character according to Go's definition.
17 pg
= pp | pZ
// a graphical character according to the Unicode definition.
18 pLo
= pLl | pLu
// a letter that is neither upper nor lower case.
22 // GraphicRanges defines the set of graphic characters according to Unicode.
23 var GraphicRanges
= []*RangeTable
{
27 // PrintRanges defines the set of printable characters according to Go.
28 // ASCII space, U+0020, is handled separately.
29 var PrintRanges
= []*RangeTable
{
33 // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
34 // Such characters include letters, marks, numbers, punctuation, symbols, and
35 // spaces, from categories L, M, N, P, S, Zs.
36 func IsGraphic(r rune
) bool {
37 // We convert to uint32 to avoid the extra test for negative,
38 // and in the index we convert to uint8 to avoid the range check.
39 if uint32(r
) <= MaxLatin1
{
40 return properties
[uint8(r
)]&pg
!= 0
42 return In(r
, GraphicRanges
...)
45 // IsPrint reports whether the rune is defined as printable by Go. Such
46 // characters include letters, marks, numbers, punctuation, symbols, and the
47 // ASCII space character, from categories L, M, N, P, S and the ASCII space
48 // character. This categorization is the same as IsGraphic except that the
49 // only spacing character is ASCII space, U+0020.
50 func IsPrint(r rune
) bool {
51 if uint32(r
) <= MaxLatin1
{
52 return properties
[uint8(r
)]&pp
!= 0
54 return In(r
, PrintRanges
...)
57 // IsOneOf reports whether the rune is a member of one of the ranges.
58 // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
59 func IsOneOf(ranges
[]*RangeTable
, r rune
) bool {
60 for _
, inside
:= range ranges
{
68 // In reports whether the rune is a member of one of the ranges.
69 func In(r rune
, ranges
...*RangeTable
) bool {
70 for _
, inside
:= range ranges
{
78 // IsControl reports whether the rune is a control character.
79 // The C (Other) Unicode category includes more code points
80 // such as surrogates; use Is(C, r) to test for them.
81 func IsControl(r rune
) bool {
82 if uint32(r
) <= MaxLatin1
{
83 return properties
[uint8(r
)]&pC
!= 0
85 // All control characters are < MaxLatin1.
89 // IsLetter reports whether the rune is a letter (category L).
90 func IsLetter(r rune
) bool {
91 if uint32(r
) <= MaxLatin1
{
92 return properties
[uint8(r
)]&(pLmask
) != 0
94 return isExcludingLatin(Letter
, r
)
97 // IsMark reports whether the rune is a mark character (category M).
98 func IsMark(r rune
) bool {
99 // There are no mark characters in Latin-1.
100 return isExcludingLatin(Mark
, r
)
103 // IsNumber reports whether the rune is a number (category N).
104 func IsNumber(r rune
) bool {
105 if uint32(r
) <= MaxLatin1
{
106 return properties
[uint8(r
)]&pN
!= 0
108 return isExcludingLatin(Number
, r
)
111 // IsPunct reports whether the rune is a Unicode punctuation character
113 func IsPunct(r rune
) bool {
114 if uint32(r
) <= MaxLatin1
{
115 return properties
[uint8(r
)]&pP
!= 0
120 // IsSpace reports whether the rune is a space character as defined
121 // by Unicode's White Space property; in the Latin-1 space
123 // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
124 // Other definitions of spacing characters are set by category
125 // Z and property Pattern_White_Space.
126 func IsSpace(r rune
) bool {
127 // This property isn't the same as Z; special-case it.
128 if uint32(r
) <= MaxLatin1
{
130 case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
135 return isExcludingLatin(White_Space
, r
)
138 // IsSymbol reports whether the rune is a symbolic character.
139 func IsSymbol(r rune
) bool {
140 if uint32(r
) <= MaxLatin1
{
141 return properties
[uint8(r
)]&pS
!= 0
143 return isExcludingLatin(Symbol
, r
)