1 // Code generated by running "go run gen.go -core" in golang.org/x/text. DO NOT EDIT.
3 // Copyright 2011 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
9 // This file contains Form-specific logic and wrappers for data in tables.go.
11 // Rune info is stored in a separate trie per composing form. A composing form
12 // and its corresponding decomposing form share the same trie. Each trie maps
13 // a rune to a uint16. The values take two forms. For v >= 0x8000:
15 // 15: 1 (inverse of NFD_QC bit of qcInfo)
16 // 13..7: qcInfo (see below). isYesD is always true (no decompostion).
17 // 6..0: ccc (compressed CCC value).
18 // For v < 0x8000, the respective rune has a decomposition and v is an index
19 // into a byte array of UTF-8 decomposition sequences and additional info and
21 // <header> <decomp_byte>* [<tccc> [<lccc>]]
22 // The header contains the number of bytes in the decomposition (excluding this
23 // length byte). The two most significant bits of this length byte correspond
24 // to bit 5 and 4 of qcInfo (see below). The byte sequence itself starts at v+1.
25 // The byte sequence is followed by a trailing and leading CCC if the values
26 // for these are not zero. The value of v determines which ccc are appended
27 // to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
28 // the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
29 // there is an additional leading ccc. The value of tccc itself is the
30 // trailing CCC shifted left 2 bits. The two least-significant bits of tccc
31 // are the number of trailing non-starters.
34 qcInfoMask
= 0x3F // to clear all but the relevant bits in a qcInfo
35 headerLenMask
= 0x3F // extract the length value from the header byte
36 headerFlagsMask
= 0xC0 // extract the qcInfo bits from the header byte
39 // Properties provides access to normalization properties of a rune.
40 type Properties
struct {
41 pos
uint8 // start position in reorderBuffer; used in composition.go
42 size
uint8 // length of UTF-8 encoding of this rune
43 ccc
uint8 // leading canonical combining class (ccc if not decomposition)
44 tccc
uint8 // trailing canonical combining class (ccc if not decomposition)
45 nLead
uint8 // number of leading non-starters.
46 flags qcInfo
// quick check flags
50 // functions dispatchable per form
51 type lookupFunc
func(b input
, i
int) Properties
53 // formInfo holds Form-specific functions and tables.
54 type formInfo
struct {
56 composing
, compatibility
bool // form type
61 var formTable
= []*formInfo
{{
66 nextMain
: nextComposed
,
72 nextMain
: nextDecomposed
,
78 nextMain
: nextComposed
,
84 nextMain
: nextDecomposed
,
87 // We do not distinguish between boundaries for NFC, NFD, etc. to avoid
88 // unexpected behavior for the user. For example, in NFD, there is a boundary
89 // after 'a'. However, 'a' might combine with modifiers, so from the application's
90 // perspective it is not a good boundary. We will therefore always use the
91 // boundaries for the combining variants.
93 // BoundaryBefore returns true if this rune starts a new segment and
94 // cannot combine with any rune on the left.
95 func (p Properties
) BoundaryBefore() bool {
96 if p
.ccc
== 0 && !p
.combinesBackward() {
99 // We assume that the CCC of the first character in a decomposition
100 // is always non-zero if different from info.ccc and that we can return
101 // false at this point. This is verified by maketables.
105 // BoundaryAfter returns true if runes cannot combine with or otherwise
106 // interact with this or previous runes.
107 func (p Properties
) BoundaryAfter() bool {
108 // TODO: loosen these conditions.
112 // We pack quick check data in 4 bits:
113 // 5: Combines forward (0 == false, 1 == true)
114 // 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
115 // 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
116 // 1..0: Number of trailing non-starters.
118 // When all 4 bits are zero, the character is inert, meaning it is never
119 // influenced by normalization.
122 func (p Properties
) isYesC() bool { return p
.flags
&0x10 == 0 }
123 func (p Properties
) isYesD() bool { return p
.flags
&0x4 == 0 }
125 func (p Properties
) combinesForward() bool { return p
.flags
&0x20 != 0 }
126 func (p Properties
) combinesBackward() bool { return p
.flags
&0x8 != 0 } // == isMaybe
127 func (p Properties
) hasDecomposition() bool { return p
.flags
&0x4 != 0 } // == isNoD
129 func (p Properties
) isInert() bool {
130 return p
.flags
&qcInfoMask
== 0 && p
.ccc
== 0
133 func (p Properties
) multiSegment() bool {
134 return p
.index
>= firstMulti
&& p
.index
< endMulti
137 func (p Properties
) nLeadingNonStarters() uint8 {
141 func (p Properties
) nTrailingNonStarters() uint8 {
142 return uint8(p
.flags
& 0x03)
145 // Decomposition returns the decomposition for the underlying rune
146 // or nil if there is none.
147 func (p Properties
) Decomposition() []byte {
148 // TODO: create the decomposition for Hangul?
153 n
:= decomps
[i
] & headerLenMask
155 return decomps
[i
: i
+uint16(n
)]
158 // Size returns the length of UTF-8 encoding of the rune.
159 func (p Properties
) Size() int {
163 // CCC returns the canonical combining class of the underlying rune.
164 func (p Properties
) CCC() uint8 {
165 if p
.index
>= firstCCCZeroExcept
{
171 // LeadCCC returns the CCC of the first rune in the decomposition.
172 // If there is no decomposition, LeadCCC equals CCC.
173 func (p Properties
) LeadCCC() uint8 {
177 // TrailCCC returns the CCC of the last rune in the decomposition.
178 // If there is no decomposition, TrailCCC equals CCC.
179 func (p Properties
) TrailCCC() uint8 {
184 // We use 32-bit keys instead of 64-bit for the two codepoint keys.
185 // This clips off the bits of three entries, but we know this will not
186 // result in a collision. In the unlikely event that changes to
187 // UnicodeData.txt introduce collisions, the compiler will catch it.
188 // Note that the recomposition map for NFC and NFKC are identical.
190 // combine returns the combined rune or 0 if it doesn't exist.
191 func combine(a
, b rune
) rune
{
192 key
:= uint32(uint16(a
))<<16 + uint32(uint16(b
))
193 return recompMap
[key
]
196 func lookupInfoNFC(b input
, i
int) Properties
{
197 v
, sz
:= b
.charinfoNFC(i
)
198 return compInfo(v
, sz
)
201 func lookupInfoNFKC(b input
, i
int) Properties
{
202 v
, sz
:= b
.charinfoNFKC(i
)
203 return compInfo(v
, sz
)
206 // Properties returns properties for the first rune in s.
207 func (f Form
) Properties(s
[]byte) Properties
{
208 if f
== NFC || f
== NFD
{
209 return compInfo(nfcData
.lookup(s
))
211 return compInfo(nfkcData
.lookup(s
))
214 // PropertiesString returns properties for the first rune in s.
215 func (f Form
) PropertiesString(s
string) Properties
{
216 if f
== NFC || f
== NFD
{
217 return compInfo(nfcData
.lookupString(s
))
219 return compInfo(nfkcData
.lookupString(s
))
222 // compInfo converts the information contained in v and sz
223 // to a Properties. See the comment at the top of the file
224 // for more information on the format.
225 func compInfo(v
uint16, sz
int) Properties
{
227 return Properties
{size
: uint8(sz
)}
228 } else if v
>= 0x8000 {
233 flags
: qcInfo(v
>> 8),
235 if p
.ccc
> 0 || p
.combinesBackward() {
236 p
.nLead
= uint8(p
.flags
& 0x3)
242 f
:= (qcInfo(h
&headerFlagsMask
) >> 2) |
0x4
243 p
:= Properties
{size
: uint8(sz
), flags
: f
, index
: v
}
245 v
+= uint16(h
&headerLenMask
) + 1
248 p
.flags |
= qcInfo(c
& 0x3)
249 if v
>= firstLeadingCCC
{
251 if v
>= firstStarterWithNLead
{
252 // We were tricked. Remove the decomposition.