Merge from mainline (167278:168000).
[official-gcc/graphite-test-results.git] / libgo / go / unicode / letter.go
blob9380624fd9e556f989d82d6c265626206b5c8e20
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // This package provides data and functions to test some properties of Unicode code points.
6 package unicode
8 const (
9 MaxRune = 0x10FFFF // Maximum valid Unicode code point.
10 ReplacementChar = 0xFFFD // Represents invalid code points.
14 // The representation of a range of Unicode code points. The range runs from Lo to Hi
15 // inclusive and has the specified stride.
16 type Range struct {
17 Lo int
18 Hi int
19 Stride int
22 // CaseRange represents a range of Unicode code points for simple (one
23 // code point to one code point) case conversion.
24 // The range runs from Lo to Hi inclusive, with a fixed stride of 1. Deltas
25 // are the number to add to the code point to reach the code point for a
26 // different case for that character. They may be negative. If zero, it
27 // means the character is in the corresponding case. There is a special
28 // case representing sequences of alternating corresponding Upper and Lower
29 // pairs. It appears with a fixed Delta of
30 // {UpperLower, UpperLower, UpperLower}
31 // The constant UpperLower has an otherwise impossible delta value.
32 type CaseRange struct {
33 Lo int
34 Hi int
35 Delta d
38 // SpecialCase represents language-specific case mappings such as Turkish.
39 // Methods of SpecialCase customize (by overriding) the standard mappings.
40 type SpecialCase []CaseRange
42 //BUG(r): Provide a mechanism for full case folding (those that involve
43 // multiple runes in the input or output).
45 // Indices into the Delta arrays inside CaseRanges for case mapping.
46 const (
47 UpperCase = iota
48 LowerCase
49 TitleCase
50 MaxCase
53 type d [MaxCase]int32 // to make the CaseRanges text shorter
55 // If the Delta field of a CaseRange is UpperLower or LowerUpper, it means
56 // this CaseRange represents a sequence of the form (say)
57 // Upper Lower Upper Lower.
58 const (
59 UpperLower = MaxRune + 1 // (Cannot be a valid delta.)
62 // Is tests whether rune is in the specified table of ranges.
63 func Is(ranges []Range, rune int) bool {
64 // common case: rune is ASCII or Latin-1
65 if rune < 0x100 {
66 for _, r := range ranges {
67 if rune > r.Hi {
68 continue
70 if rune < r.Lo {
71 return false
73 return (rune-r.Lo)%r.Stride == 0
75 return false
78 // binary search over ranges
79 lo := 0
80 hi := len(ranges)
81 for lo < hi {
82 m := lo + (hi-lo)/2
83 r := ranges[m]
84 if r.Lo <= rune && rune <= r.Hi {
85 return (rune-r.Lo)%r.Stride == 0
87 if rune < r.Lo {
88 hi = m
89 } else {
90 lo = m + 1
93 return false
96 // IsUpper reports whether the rune is an upper case letter.
97 func IsUpper(rune int) bool {
98 if rune < 0x80 { // quick ASCII check
99 return 'A' <= rune && rune <= 'Z'
101 return Is(Upper, rune)
104 // IsLower reports whether the rune is a lower case letter.
105 func IsLower(rune int) bool {
106 if rune < 0x80 { // quick ASCII check
107 return 'a' <= rune && rune <= 'z'
109 return Is(Lower, rune)
112 // IsTitle reports whether the rune is a title case letter.
113 func IsTitle(rune int) bool {
114 if rune < 0x80 { // quick ASCII check
115 return false
117 return Is(Title, rune)
120 // IsLetter reports whether the rune is a letter.
121 func IsLetter(rune int) bool {
122 if rune < 0x80 { // quick ASCII check
123 rune &^= 'a' - 'A'
124 return 'A' <= rune && rune <= 'Z'
126 return Is(Letter, rune)
129 // IsSpace reports whether the rune is a white space character.
130 func IsSpace(rune int) bool {
131 if rune <= 0xFF { // quick Latin-1 check
132 switch rune {
133 case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
134 return true
136 return false
138 return Is(White_Space, rune)
141 // to maps the rune using the specified case mapping.
142 func to(_case int, rune int, caseRange []CaseRange) int {
143 if _case < 0 || MaxCase <= _case {
144 return ReplacementChar // as reasonable an error as any
146 // binary search over ranges
147 lo := 0
148 hi := len(caseRange)
149 for lo < hi {
150 m := lo + (hi-lo)/2
151 r := caseRange[m]
152 if r.Lo <= rune && rune <= r.Hi {
153 delta := int(r.Delta[_case])
154 if delta > MaxRune {
155 // In an Upper-Lower sequence, which always starts with
156 // an UpperCase letter, the real deltas always look like:
157 // {0, 1, 0} UpperCase (Lower is next)
158 // {-1, 0, -1} LowerCase (Upper, Title are previous)
159 // The characters at even offsets from the beginning of the
160 // sequence are upper case; the ones at odd offsets are lower.
161 // The correct mapping can be done by clearing or setting the low
162 // bit in the sequence offset.
163 // The constants UpperCase and TitleCase are even while LowerCase
164 // is odd so we take the low bit from _case.
165 return r.Lo + ((rune-r.Lo)&^1 | _case&1)
167 return rune + delta
169 if rune < r.Lo {
170 hi = m
171 } else {
172 lo = m + 1
175 return rune
178 // To maps the rune to the specified case: UpperCase, LowerCase, or TitleCase.
179 func To(_case int, rune int) int {
180 return to(_case, rune, CaseRanges)
183 // ToUpper maps the rune to upper case.
184 func ToUpper(rune int) int {
185 if rune < 0x80 { // quick ASCII check
186 if 'a' <= rune && rune <= 'z' {
187 rune -= 'a' - 'A'
189 return rune
191 return To(UpperCase, rune)
194 // ToLower maps the rune to lower case.
195 func ToLower(rune int) int {
196 if rune < 0x80 { // quick ASCII check
197 if 'A' <= rune && rune <= 'Z' {
198 rune += 'a' - 'A'
200 return rune
202 return To(LowerCase, rune)
205 // ToTitle maps the rune to title case.
206 func ToTitle(rune int) int {
207 if rune < 0x80 { // quick ASCII check
208 if 'a' <= rune && rune <= 'z' { // title case is upper case for ASCII
209 rune -= 'a' - 'A'
211 return rune
213 return To(TitleCase, rune)
216 // ToUpper maps the rune to upper case giving priority to the special mapping.
217 func (special SpecialCase) ToUpper(rune int) int {
218 r := to(UpperCase, rune, []CaseRange(special))
219 if r == rune {
220 r = ToUpper(rune)
222 return r
225 // ToTitle maps the rune to title case giving priority to the special mapping.
226 func (special SpecialCase) ToTitle(rune int) int {
227 r := to(TitleCase, rune, []CaseRange(special))
228 if r == rune {
229 r = ToTitle(rune)
231 return r
234 // ToLower maps the rune to lower case giving priority to the special mapping.
235 func (special SpecialCase) ToLower(rune int) int {
236 r := to(LowerCase, rune, []CaseRange(special))
237 if r == rune {
238 r = ToLower(rune)
240 return r