libgo: update to go1.9
[official-gcc.git] / libgo / go / golang_org / x / text / secure / bidirule / bidirule.go
blob9f9594e617998e2e0eef327999c9b8b7cdef193c
1 // Code generated by running "go run gen.go -core" in golang.org/x/text. DO NOT EDIT.
3 // Copyright 2016 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
7 // Package bidirule implements the Bidi Rule defined by RFC 5893.
8 //
9 // This package is under development. The API may change without notice and
10 // without preserving backward compatibility.
11 package bidirule
13 import (
14 "errors"
15 "unicode/utf8"
17 "golang_org/x/text/transform"
18 "golang_org/x/text/unicode/bidi"
21 // This file contains an implementation of RFC 5893: Right-to-Left Scripts for
22 // Internationalized Domain Names for Applications (IDNA)
24 // A label is an individual component of a domain name. Labels are usually
25 // shown separated by dots; for example, the domain name "www.example.com" is
26 // composed of three labels: "www", "example", and "com".
28 // An RTL label is a label that contains at least one character of class R, AL,
29 // or AN. An LTR label is any label that is not an RTL label.
31 // A "Bidi domain name" is a domain name that contains at least one RTL label.
33 // The following guarantees can be made based on the above:
35 // o In a domain name consisting of only labels that satisfy the rule,
36 // the requirements of Section 3 are satisfied. Note that even LTR
37 // labels and pure ASCII labels have to be tested.
39 // o In a domain name consisting of only LDH labels (as defined in the
40 // Definitions document [RFC5890]) and labels that satisfy the rule,
41 // the requirements of Section 3 are satisfied as long as a label
42 // that starts with an ASCII digit does not come after a
43 // right-to-left label.
45 // No guarantee is given for other combinations.
47 // ErrInvalid indicates a label is invalid according to the Bidi Rule.
48 var ErrInvalid = errors.New("bidirule: failed Bidi Rule")
50 type ruleState uint8
52 const (
53 ruleInitial ruleState = iota
54 ruleLTR
55 ruleLTRFinal
56 ruleRTL
57 ruleRTLFinal
58 ruleInvalid
61 type ruleTransition struct {
62 next ruleState
63 mask uint16
66 var transitions = [...][2]ruleTransition{
67 // [2.1] The first character must be a character with Bidi property L, R, or
68 // AL. If it has the R or AL property, it is an RTL label; if it has the L
69 // property, it is an LTR label.
70 ruleInitial: {
71 {ruleLTRFinal, 1 << bidi.L},
72 {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL},
74 ruleRTL: {
75 // [2.3] In an RTL label, the end of the label must be a character with
76 // Bidi property R, AL, EN, or AN, followed by zero or more characters
77 // with Bidi property NSM.
78 {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN},
80 // [2.2] In an RTL label, only characters with the Bidi properties R,
81 // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
82 // We exclude the entries from [2.3]
83 {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
85 ruleRTLFinal: {
86 // [2.3] In an RTL label, the end of the label must be a character with
87 // Bidi property R, AL, EN, or AN, followed by zero or more characters
88 // with Bidi property NSM.
89 {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM},
91 // [2.2] In an RTL label, only characters with the Bidi properties R,
92 // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
93 // We exclude the entries from [2.3] and NSM.
94 {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
96 ruleLTR: {
97 // [2.6] In an LTR label, the end of the label must be a character with
98 // Bidi property L or EN, followed by zero or more characters with Bidi
99 // property NSM.
100 {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN},
102 // [2.5] In an LTR label, only characters with the Bidi properties L,
103 // EN, ES, CS, ET, ON, BN, or NSM are allowed.
104 // We exclude the entries from [2.6].
105 {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
107 ruleLTRFinal: {
108 // [2.6] In an LTR label, the end of the label must be a character with
109 // Bidi property L or EN, followed by zero or more characters with Bidi
110 // property NSM.
111 {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM},
113 // [2.5] In an LTR label, only characters with the Bidi properties L,
114 // EN, ES, CS, ET, ON, BN, or NSM are allowed.
115 // We exclude the entries from [2.6].
116 {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
118 ruleInvalid: {
119 {ruleInvalid, 0},
120 {ruleInvalid, 0},
124 // [2.4] In an RTL label, if an EN is present, no AN may be present, and
125 // vice versa.
126 const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)
128 // From RFC 5893
129 // An RTL label is a label that contains at least one character of type
130 // R, AL, or AN.
132 // An LTR label is any label that is not an RTL label.
134 // Direction reports the direction of the given label as defined by RFC 5893.
135 // The Bidi Rule does not have to be applied to labels of the category
136 // LeftToRight.
137 func Direction(b []byte) bidi.Direction {
138 for i := 0; i < len(b); {
139 e, sz := bidi.Lookup(b[i:])
140 if sz == 0 {
143 c := e.Class()
144 if c == bidi.R || c == bidi.AL || c == bidi.AN {
145 return bidi.RightToLeft
147 i += sz
149 return bidi.LeftToRight
152 // DirectionString reports the direction of the given label as defined by RFC
153 // 5893. The Bidi Rule does not have to be applied to labels of the category
154 // LeftToRight.
155 func DirectionString(s string) bidi.Direction {
156 for i := 0; i < len(s); {
157 e, sz := bidi.LookupString(s[i:])
158 if sz == 0 {
161 c := e.Class()
162 if c == bidi.R || c == bidi.AL || c == bidi.AN {
163 return bidi.RightToLeft
165 i += sz
167 return bidi.LeftToRight
170 // Valid reports whether b conforms to the BiDi rule.
171 func Valid(b []byte) bool {
172 var t Transformer
173 if n, ok := t.advance(b); !ok || n < len(b) {
174 return false
176 return t.isFinal()
179 // ValidString reports whether s conforms to the BiDi rule.
180 func ValidString(s string) bool {
181 var t Transformer
182 if n, ok := t.advanceString(s); !ok || n < len(s) {
183 return false
185 return t.isFinal()
188 // New returns a Transformer that verifies that input adheres to the Bidi Rule.
189 func New() *Transformer {
190 return &Transformer{}
193 // Transformer implements transform.Transform.
194 type Transformer struct {
195 state ruleState
196 hasRTL bool
197 seen uint16
200 // A rule can only be violated for "Bidi Domain names", meaning if one of the
201 // following categories has been observed.
202 func (t *Transformer) isRTL() bool {
203 const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN
204 return t.seen&isRTL != 0
207 func (t *Transformer) isFinal() bool {
208 if !t.isRTL() {
209 return true
211 return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
214 // Reset implements transform.Transformer.
215 func (t *Transformer) Reset() { *t = Transformer{} }
217 // Transform implements transform.Transformer. This Transformer has state and
218 // needs to be reset between uses.
219 func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
220 if len(dst) < len(src) {
221 src = src[:len(dst)]
222 atEOF = false
223 err = transform.ErrShortDst
225 n, err1 := t.Span(src, atEOF)
226 copy(dst, src[:n])
227 if err == nil || err1 != nil && err1 != transform.ErrShortSrc {
228 err = err1
230 return n, n, err
233 // Span returns the first n bytes of src that conform to the Bidi rule.
234 func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) {
235 if t.state == ruleInvalid && t.isRTL() {
236 return 0, ErrInvalid
238 n, ok := t.advance(src)
239 switch {
240 case !ok:
241 err = ErrInvalid
242 case n < len(src):
243 if !atEOF {
244 err = transform.ErrShortSrc
245 break
247 err = ErrInvalid
248 case !t.isFinal():
249 err = ErrInvalid
251 return n, err
254 // Precomputing the ASCII values decreases running time for the ASCII fast path
255 // by about 30%.
256 var asciiTable [128]bidi.Properties
258 func init() {
259 for i := range asciiTable {
260 p, _ := bidi.LookupRune(rune(i))
261 asciiTable[i] = p
265 func (t *Transformer) advance(s []byte) (n int, ok bool) {
266 var e bidi.Properties
267 var sz int
268 for n < len(s) {
269 if s[n] < utf8.RuneSelf {
270 e, sz = asciiTable[s[n]], 1
271 } else {
272 e, sz = bidi.Lookup(s[n:])
273 if sz <= 1 {
274 if sz == 1 {
275 // We always consider invalid UTF-8 to be invalid, even if
276 // the string has not yet been determined to be RTL.
277 // TODO: is this correct?
278 return n, false
280 return n, true // incomplete UTF-8 encoding
283 // TODO: using CompactClass would result in noticeable speedup.
284 // See unicode/bidi/prop.go:Properties.CompactClass.
285 c := uint16(1 << e.Class())
286 t.seen |= c
287 if t.seen&exclusiveRTL == exclusiveRTL {
288 t.state = ruleInvalid
289 return n, false
291 switch tr := transitions[t.state]; {
292 case tr[0].mask&c != 0:
293 t.state = tr[0].next
294 case tr[1].mask&c != 0:
295 t.state = tr[1].next
296 default:
297 t.state = ruleInvalid
298 if t.isRTL() {
299 return n, false
302 n += sz
304 return n, true
307 func (t *Transformer) advanceString(s string) (n int, ok bool) {
308 var e bidi.Properties
309 var sz int
310 for n < len(s) {
311 if s[n] < utf8.RuneSelf {
312 e, sz = asciiTable[s[n]], 1
313 } else {
314 e, sz = bidi.LookupString(s[n:])
315 if sz <= 1 {
316 if sz == 1 {
317 return n, false // invalid UTF-8
319 return n, true // incomplete UTF-8 encoding
322 // TODO: using CompactClass results in noticeable speedup.
323 // See unicode/bidi/prop.go:Properties.CompactClass.
324 c := uint16(1 << e.Class())
325 t.seen |= c
326 if t.seen&exclusiveRTL == exclusiveRTL {
327 t.state = ruleInvalid
328 return n, false
330 switch tr := transitions[t.state]; {
331 case tr[0].mask&c != 0:
332 t.state = tr[0].next
333 case tr[1].mask&c != 0:
334 t.state = tr[1].next
335 default:
336 t.state = ruleInvalid
337 if t.isRTL() {
338 return n, false
341 n += sz
343 return n, true