1 // Code generated by running "go run gen.go -core" in golang.org/x/text. DO NOT EDIT.
3 // Copyright 2016 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
11 // Properties provides access to BiDi properties of runes.
12 type Properties
struct {
17 var trie
= newBidiTrie(0)
19 // TODO: using this for bidirule reduces the running time by about 5%. Consider
20 // if this is worth exposing or if we can find a way to speed up the Class
23 // // CompactClass is like Class, but maps all of the BiDi control classes
24 // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
25 // func (p Properties) CompactClass() Class {
26 // return Class(p.entry & 0x0F)
29 // Class returns the Bidi class for p.
30 func (p Properties
) Class() Class
{
31 c
:= Class(p
.entry
& 0x0F)
33 c
= controlByteToClass
[p
.last
&0xF]
38 // IsBracket reports whether the rune is a bracket.
39 func (p Properties
) IsBracket() bool { return p
.entry
&0xF0 != 0 }
41 // IsOpeningBracket reports whether the rune is an opening bracket.
42 // IsBracket must return true.
43 func (p Properties
) IsOpeningBracket() bool { return p
.entry
&openMask
!= 0 }
45 // TODO: find a better API and expose.
46 func (p Properties
) reverseBracket(r rune
) rune
{
47 return xorMasks
[p
.entry
>>xorMaskShift
] ^ r
50 var controlByteToClass
= [16]Class
{
51 0xD: LRO
, // U+202D LeftToRightOverride,
52 0xE: RLO
, // U+202E RightToLeftOverride,
53 0xA: LRE
, // U+202A LeftToRightEmbedding,
54 0xB: RLE
, // U+202B RightToLeftEmbedding,
55 0xC: PDF
, // U+202C PopDirectionalFormat,
56 0x6: LRI
, // U+2066 LeftToRightIsolate,
57 0x7: RLI
, // U+2067 RightToLeftIsolate,
58 0x8: FSI
, // U+2068 FirstStrongIsolate,
59 0x9: PDI
, // U+2069 PopDirectionalIsolate,
62 // LookupRune returns properties for r.
63 func LookupRune(r rune
) (p Properties
, size
int) {
65 n
:= utf8
.EncodeRune(buf
[:], r
)
66 return Lookup(buf
[:n
])
69 // TODO: these lookup methods are based on the generated trie code. The returned
70 // sizes have slightly different semantics from the generated code, in that it
71 // always returns size==1 for an illegal UTF-8 byte (instead of the length
72 // of the maximum invalid subsequence). Most Transformers, like unicode/norm,
73 // leave invalid UTF-8 untouched, in which case it has performance benefits to
74 // do so (without changing the semantics). Bidi requires the semantics used here
75 // for the bidirule implementation to be compatible with the Go semantics.
76 // They ultimately should perhaps be adopted by all trie implementations, for
78 // This unrolled code also boosts performance of the secure/bidirule package by
80 // So, to remove this code:
81 // - add option to trie generator to define return type.
82 // - always return 1 byte size for ill-formed UTF-8 runes.
84 // Lookup returns properties for the first rune in s and the width in bytes of
85 // its encoding. The size will be 0 if s does not hold enough bytes to complete
87 func Lookup(s
[]byte) (p Properties
, sz
int) {
90 case c0
< 0x80: // is ASCII
91 return Properties
{entry
: bidiValues
[c0
]}, 1
93 return Properties
{}, 1
94 case c0
< 0xE0: // 2-byte UTF-8
96 return Properties
{}, 0
100 if c1
< 0x80 ||
0xC0 <= c1
{
101 return Properties
{}, 1
103 return Properties
{entry
: trie
.lookupValue(uint32(i
), c1
)}, 2
104 case c0
< 0xF0: // 3-byte UTF-8
106 return Properties
{}, 0
110 if c1
< 0x80 ||
0xC0 <= c1
{
111 return Properties
{}, 1
113 o
:= uint32(i
)<<6 + uint32(c1
)
116 if c2
< 0x80 ||
0xC0 <= c2
{
117 return Properties
{}, 1
119 return Properties
{entry
: trie
.lookupValue(uint32(i
), c2
), last
: c2
}, 3
120 case c0
< 0xF8: // 4-byte UTF-8
122 return Properties
{}, 0
126 if c1
< 0x80 ||
0xC0 <= c1
{
127 return Properties
{}, 1
129 o
:= uint32(i
)<<6 + uint32(c1
)
132 if c2
< 0x80 ||
0xC0 <= c2
{
133 return Properties
{}, 1
135 o
= uint32(i
)<<6 + uint32(c2
)
138 if c3
< 0x80 ||
0xC0 <= c3
{
139 return Properties
{}, 1
141 return Properties
{entry
: trie
.lookupValue(uint32(i
), c3
)}, 4
144 return Properties
{}, 1
147 // LookupString returns properties for the first rune in s and the width in
148 // bytes of its encoding. The size will be 0 if s does not hold enough bytes to
149 // complete the encoding.
150 func LookupString(s
string) (p Properties
, sz
int) {
153 case c0
< 0x80: // is ASCII
154 return Properties
{entry
: bidiValues
[c0
]}, 1
156 return Properties
{}, 1
157 case c0
< 0xE0: // 2-byte UTF-8
159 return Properties
{}, 0
163 if c1
< 0x80 ||
0xC0 <= c1
{
164 return Properties
{}, 1
166 return Properties
{entry
: trie
.lookupValue(uint32(i
), c1
)}, 2
167 case c0
< 0xF0: // 3-byte UTF-8
169 return Properties
{}, 0
173 if c1
< 0x80 ||
0xC0 <= c1
{
174 return Properties
{}, 1
176 o
:= uint32(i
)<<6 + uint32(c1
)
179 if c2
< 0x80 ||
0xC0 <= c2
{
180 return Properties
{}, 1
182 return Properties
{entry
: trie
.lookupValue(uint32(i
), c2
), last
: c2
}, 3
183 case c0
< 0xF8: // 4-byte UTF-8
185 return Properties
{}, 0
189 if c1
< 0x80 ||
0xC0 <= c1
{
190 return Properties
{}, 1
192 o
:= uint32(i
)<<6 + uint32(c1
)
195 if c2
< 0x80 ||
0xC0 <= c2
{
196 return Properties
{}, 1
198 o
= uint32(i
)<<6 + uint32(c2
)
201 if c3
< 0x80 ||
0xC0 <= c3
{
202 return Properties
{}, 1
204 return Properties
{entry
: trie
.lookupValue(uint32(i
), c3
)}, 4
207 return Properties
{}, 1