1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // This package provides data and functions to test some properties of Unicode code points.
9 MaxRune
= 0x10FFFF // Maximum valid Unicode code point.
10 ReplacementChar
= 0xFFFD // Represents invalid code points.
14 // The representation of a range of Unicode code points. The range runs from Lo to Hi
15 // inclusive and has the specified stride.
22 // CaseRange represents a range of Unicode code points for simple (one
23 // code point to one code point) case conversion.
24 // The range runs from Lo to Hi inclusive, with a fixed stride of 1. Deltas
25 // are the number to add to the code point to reach the code point for a
26 // different case for that character. They may be negative. If zero, it
27 // means the character is in the corresponding case. There is a special
28 // case representing sequences of alternating corresponding Upper and Lower
29 // pairs. It appears with a fixed Delta of
30 // {UpperLower, UpperLower, UpperLower}
31 // The constant UpperLower has an otherwise impossible delta value.
32 type CaseRange
struct {
38 // SpecialCase represents language-specific case mappings such as Turkish.
39 // Methods of SpecialCase customize (by overriding) the standard mappings.
40 type SpecialCase
[]CaseRange
42 //BUG(r): Provide a mechanism for full case folding (those that involve
43 // multiple runes in the input or output).
45 // Indices into the Delta arrays inside CaseRanges for case mapping.
53 type d
[MaxCase
]int32 // to make the CaseRanges text shorter
55 // If the Delta field of a CaseRange is UpperLower or LowerUpper, it means
56 // this CaseRange represents a sequence of the form (say)
57 // Upper Lower Upper Lower.
59 UpperLower
= MaxRune
+ 1 // (Cannot be a valid delta.)
62 // Is tests whether rune is in the specified table of ranges.
63 func Is(ranges
[]Range
, rune
int) bool {
64 // common case: rune is ASCII or Latin-1
66 for _
, r
:= range ranges
{
73 return (rune
-r
.Lo
)%r
.Stride
== 0
78 // binary search over ranges
84 if r
.Lo
<= rune
&& rune
<= r
.Hi
{
85 return (rune
-r
.Lo
)%r
.Stride
== 0
96 // IsUpper reports whether the rune is an upper case letter.
97 func IsUpper(rune
int) bool {
98 if rune
< 0x80 { // quick ASCII check
99 return 'A' <= rune
&& rune
<= 'Z'
101 return Is(Upper
, rune
)
104 // IsLower reports whether the rune is a lower case letter.
105 func IsLower(rune
int) bool {
106 if rune
< 0x80 { // quick ASCII check
107 return 'a' <= rune
&& rune
<= 'z'
109 return Is(Lower
, rune
)
112 // IsTitle reports whether the rune is a title case letter.
113 func IsTitle(rune
int) bool {
114 if rune
< 0x80 { // quick ASCII check
117 return Is(Title
, rune
)
120 // IsLetter reports whether the rune is a letter.
121 func IsLetter(rune
int) bool {
122 if rune
< 0x80 { // quick ASCII check
124 return 'A' <= rune
&& rune
<= 'Z'
126 return Is(Letter
, rune
)
129 // IsSpace reports whether the rune is a white space character.
130 func IsSpace(rune
int) bool {
131 if rune
<= 0xFF { // quick Latin-1 check
133 case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
138 return Is(White_Space
, rune
)
141 // to maps the rune using the specified case mapping.
142 func to(_case
int, rune
int, caseRange
[]CaseRange
) int {
143 if _case
< 0 || MaxCase
<= _case
{
144 return ReplacementChar
// as reasonable an error as any
146 // binary search over ranges
152 if r
.Lo
<= rune
&& rune
<= r
.Hi
{
153 delta
:= int(r
.Delta
[_case
])
155 // In an Upper-Lower sequence, which always starts with
156 // an UpperCase letter, the real deltas always look like:
157 // {0, 1, 0} UpperCase (Lower is next)
158 // {-1, 0, -1} LowerCase (Upper, Title are previous)
159 // The characters at even offsets from the beginning of the
160 // sequence are upper case; the ones at odd offsets are lower.
161 // The correct mapping can be done by clearing or setting the low
162 // bit in the sequence offset.
163 // The constants UpperCase and TitleCase are even while LowerCase
164 // is odd so we take the low bit from _case.
165 return r
.Lo
+ ((rune
-r
.Lo
)&^1 | _case
&1)
178 // To maps the rune to the specified case: UpperCase, LowerCase, or TitleCase.
179 func To(_case
int, rune
int) int {
180 return to(_case
, rune
, CaseRanges
)
183 // ToUpper maps the rune to upper case.
184 func ToUpper(rune
int) int {
185 if rune
< 0x80 { // quick ASCII check
186 if 'a' <= rune
&& rune
<= 'z' {
191 return To(UpperCase
, rune
)
194 // ToLower maps the rune to lower case.
195 func ToLower(rune
int) int {
196 if rune
< 0x80 { // quick ASCII check
197 if 'A' <= rune
&& rune
<= 'Z' {
202 return To(LowerCase
, rune
)
205 // ToTitle maps the rune to title case.
206 func ToTitle(rune
int) int {
207 if rune
< 0x80 { // quick ASCII check
208 if 'a' <= rune
&& rune
<= 'z' { // title case is upper case for ASCII
213 return To(TitleCase
, rune
)
216 // ToUpper maps the rune to upper case giving priority to the special mapping.
217 func (special SpecialCase
) ToUpper(rune
int) int {
218 r
:= to(UpperCase
, rune
, []CaseRange(special
))
225 // ToTitle maps the rune to title case giving priority to the special mapping.
226 func (special SpecialCase
) ToTitle(rune
int) int {
227 r
:= to(TitleCase
, rune
, []CaseRange(special
))
234 // ToLower maps the rune to lower case giving priority to the special mapping.
235 func (special SpecialCase
) ToLower(rune
int) int {
236 r
:= to(LowerCase
, rune
, []CaseRange(special
))