|
|
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package idna implements IDNA2008 using the compatibility processing
// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
// deal with the transition from IDNA2003.
//
// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
// UTS #46 is defined in http://www.unicode.org/reports/tr46.
// See http://unicode.org/cldr/utility/idna.jsp for a visualization of the
// differences between these two standards.
package idna // import "golang.org/x/net/idna"
import ( "fmt" "strings" "unicode/utf8"
"golang.org/x/text/secure/bidirule" "golang.org/x/text/unicode/bidi" "golang.org/x/text/unicode/norm" )
// NOTE: Unlike common practice in Go APIs, the functions will return a
// sanitized domain name in case of errors. Browsers sometimes use a partially
// evaluated string as lookup.
// TODO: the current error handling is, in my opinion, the least opinionated.
// Other strategies are also viable, though:
// Option 1) Return an empty string in case of error, but allow the user to
// specify explicitly which errors to ignore.
// Option 2) Return the partially evaluated string if it is itself a valid
// string, otherwise return the empty string in case of error.
// Option 3) Option 1 and 2.
// Option 4) Always return an empty string for now and implement Option 1 as
// needed, and document that the return string may not be empty in case of
// error in the future.
// I think Option 1 is best, but it is quite opinionated.
// ToASCII is a wrapper for Punycode.ToASCII.
func ToASCII(s string) (string, error) { return Punycode.process(s, true) }
// ToUnicode is a wrapper for Punycode.ToUnicode.
func ToUnicode(s string) (string, error) { return Punycode.process(s, false) }
// An Option configures a Profile at creation time.
type Option func(*options)
// Transitional sets a Profile to use the Transitional mapping as defined in UTS
// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
// transitional mapping provides a compromise between IDNA2003 and IDNA2008
// compatibility. It is used by most browsers when resolving domain names. This
// option is only meaningful if combined with MapForLookup.
func Transitional(transitional bool) Option { return func(o *options) { o.transitional = true } }
// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
// are longer than allowed by the RFC.
func VerifyDNSLength(verify bool) Option { return func(o *options) { o.verifyDNSLength = verify } }
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
//
// This is the behavior suggested by the UTS #46 and is adopted by some
// browsers.
func RemoveLeadingDots(remove bool) Option { return func(o *options) { o.removeLeadingDots = remove } }
// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
func ValidateLabels(enable bool) Option { return func(o *options) { // Don't override existing mappings, but set one that at least checks
// normalization if it is not set.
if o.mapping == nil && enable { o.mapping = normalize } o.trie = trie o.validateLabels = enable o.fromPuny = validateFromPunycode } }
// StrictDomainName limits the set of permissible ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
//
// This option is useful, for instance, for browsers that allow characters
// outside this range, for example a '_' (U+005F LOW LINE). See
// http://www.rfc-editor.org/std/std3.txt for more details This option
// corresponds to the UseSTD3ASCIIRules option in UTS #46.
func StrictDomainName(use bool) Option { return func(o *options) { o.trie = trie o.useSTD3Rules = use o.fromPuny = validateFromPunycode } }
// NOTE: the following options pull in tables. The tables should not be linked
// in as long as the options are not used.
// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
// that relies on proper validation of labels should include this rule.
func BidiRule() Option { return func(o *options) { o.bidirule = bidirule.ValidString } }
// ValidateForRegistration sets validation options to verify that a given IDN is
// properly formatted for registration as defined by Section 4 of RFC 5891.
func ValidateForRegistration() Option { return func(o *options) { o.mapping = validateRegistration StrictDomainName(true)(o) ValidateLabels(true)(o) VerifyDNSLength(true)(o) BidiRule()(o) } }
// MapForLookup sets validation and mapping options such that a given IDN is
// transformed for domain name lookup according to the requirements set out in
// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
// to add this check.
//
// The mappings include normalization and mapping case, width and other
// compatibility mappings.
func MapForLookup() Option { return func(o *options) { o.mapping = validateAndMap StrictDomainName(true)(o) ValidateLabels(true)(o) } }
type options struct { transitional bool useSTD3Rules bool validateLabels bool verifyDNSLength bool removeLeadingDots bool
trie *idnaTrie
// fromPuny calls validation rules when converting A-labels to U-labels.
fromPuny func(p *Profile, s string) error
// mapping implements a validation and mapping step as defined in RFC 5895
// or UTS 46, tailored to, for example, domain registration or lookup.
mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
// bidirule, if specified, checks whether s conforms to the Bidi Rule
// defined in RFC 5893.
bidirule func(s string) bool }
// A Profile defines the configuration of an IDNA mapper.
type Profile struct { options }
func apply(o *options, opts []Option) { for _, f := range opts { f(o) } }
// New creates a new Profile.
//
// With no options, the returned Profile is the most permissive and equals the
// Punycode Profile. Options can be passed to further restrict the Profile. The
// MapForLookup and ValidateForRegistration options set a collection of options,
// for lookup and registration purposes respectively, which can be tailored by
// adding more fine-grained options, where later options override earlier
// options.
func New(o ...Option) *Profile { p := &Profile{} apply(&p.options, o) return p }
// ToASCII converts a domain or domain label to its ASCII form. For example,
// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
// ToASCII("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToASCII(s string) (string, error) { return p.process(s, true) }
// ToUnicode converts a domain or domain label to its Unicode form. For example,
// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
// ToUnicode("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToUnicode(s string) (string, error) { pp := *p pp.transitional = false return pp.process(s, false) }
// String reports a string with a description of the profile for debugging
// purposes. The string format may change with different versions.
func (p *Profile) String() string { s := "" if p.transitional { s = "Transitional" } else { s = "NonTransitional" } if p.useSTD3Rules { s += ":UseSTD3Rules" } if p.validateLabels { s += ":ValidateLabels" } if p.verifyDNSLength { s += ":VerifyDNSLength" } return s }
var ( // Punycode is a Profile that does raw punycode processing with a minimum
// of validation.
Punycode *Profile = punycode
// Lookup is the recommended profile for looking up domain names, according
// to Section 5 of RFC 5891. The exact configuration of this profile may
// change over time.
Lookup *Profile = lookup
// Display is the recommended profile for displaying domain names.
// The configuration of this profile may change over time.
Display *Profile = display
// Registration is the recommended profile for checking whether a given
// IDN is valid for registration, according to Section 4 of RFC 5891.
Registration *Profile = registration
punycode = &Profile{} lookup = &Profile{options{ transitional: true, useSTD3Rules: true, validateLabels: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateAndMap, bidirule: bidirule.ValidString, }} display = &Profile{options{ useSTD3Rules: true, validateLabels: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateAndMap, bidirule: bidirule.ValidString, }} registration = &Profile{options{ useSTD3Rules: true, validateLabels: true, verifyDNSLength: true, trie: trie, fromPuny: validateFromPunycode, mapping: validateRegistration, bidirule: bidirule.ValidString, }}
// TODO: profiles
// Register: recommended for approving domain names: don't do any mappings
// but rather reject on invalid input. Bundle or block deviation characters.
)
type labelError struct{ label, code_ string }
func (e labelError) code() string { return e.code_ } func (e labelError) Error() string { return fmt.Sprintf("idna: invalid label %q", e.label) }
type runeError rune
func (e runeError) code() string { return "P1" } func (e runeError) Error() string { return fmt.Sprintf("idna: disallowed rune %U", e) }
// process implements the algorithm described in section 4 of UTS #46,
// see http://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) { var err error var isBidi bool if p.mapping != nil { s, isBidi, err = p.mapping(p, s) } // Remove leading empty labels.
if p.removeLeadingDots { for ; len(s) > 0 && s[0] == '.'; s = s[1:] { } } // TODO: allow for a quick check of the tables data.
// It seems like we should only create this error on ToASCII, but the
// UTS 46 conformance tests suggests we should always check this.
if err == nil && p.verifyDNSLength && s == "" { err = &labelError{s, "A4"} } labels := labelIter{orig: s} for ; !labels.done(); labels.next() { label := labels.label() if label == "" { // Empty labels are not okay. The label iterator skips the last
// label if it is empty.
if err == nil && p.verifyDNSLength { err = &labelError{s, "A4"} } continue } if strings.HasPrefix(label, acePrefix) { u, err2 := decode(label[len(acePrefix):]) if err2 != nil { if err == nil { err = err2 } // Spec says keep the old label.
continue } isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight labels.set(u) if err == nil && p.validateLabels { err = p.fromPuny(p, u) } if err == nil { // This should be called on NonTransitional, according to the
// spec, but that currently does not have any effect. Use the
// original profile to preserve options.
err = p.validateLabel(u) } } else if err == nil { err = p.validateLabel(label) } } if isBidi && p.bidirule != nil && err == nil { for labels.reset(); !labels.done(); labels.next() { if !p.bidirule(labels.label()) { err = &labelError{s, "B"} break } } } if toASCII { for labels.reset(); !labels.done(); labels.next() { label := labels.label() if !ascii(label) { a, err2 := encode(acePrefix, label) if err == nil { err = err2 } label = a labels.set(a) } n := len(label) if p.verifyDNSLength && err == nil && (n == 0 || n > 63) { err = &labelError{label, "A4"} } } } s = labels.result() if toASCII && p.verifyDNSLength && err == nil { // Compute the length of the domain name minus the root label and its dot.
n := len(s) if n > 0 && s[n-1] == '.' { n-- } if len(s) < 1 || n > 253 { err = &labelError{s, "A4"} } } return s, err }
func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) { // TODO: consider first doing a quick check to see if any of these checks
// need to be done. This will make it slower in the general case, but
// faster in the common case.
mapped = norm.NFC.String(s) isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft return mapped, isBidi, nil }
func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) { // TODO: filter need for normalization in loop below.
if !norm.NFC.IsNormalString(s) { return s, false, &labelError{s, "V1"} } for i := 0; i < len(s); { v, sz := trie.lookupString(s[i:]) if sz == 0 { return s, bidi, runeError(utf8.RuneError) } bidi = bidi || info(v).isBidi(s[i:]) // Copy bytes not copied so far.
switch p.simplify(info(v).category()) { // TODO: handle the NV8 defined in the Unicode idna data set to allow
// for strict conformance to IDNA2008.
case valid, deviation: case disallowed, mapped, unknown, ignored: r, _ := utf8.DecodeRuneInString(s[i:]) return s, bidi, runeError(r) } i += sz } return s, bidi, nil }
func (c info) isBidi(s string) bool { if !c.isMapped() { return c&attributesMask == rtl } // TODO: also store bidi info for mapped data. This is possible, but a bit
// cumbersome and not for the common case.
p, _ := bidi.LookupString(s) switch p.Class() { case bidi.R, bidi.AL, bidi.AN: return true } return false }
func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) { var ( b []byte k int ) // combinedInfoBits contains the or-ed bits of all runes. We use this
// to derive the mayNeedNorm bit later. This may trigger normalization
// overeagerly, but it will not do so in the common case. The end result
// is another 10% saving on BenchmarkProfile for the common case.
var combinedInfoBits info for i := 0; i < len(s); { v, sz := trie.lookupString(s[i:]) if sz == 0 { b = append(b, s[k:i]...) b = append(b, "\ufffd"...) k = len(s) if err == nil { err = runeError(utf8.RuneError) } break } combinedInfoBits |= info(v) bidi = bidi || info(v).isBidi(s[i:]) start := i i += sz // Copy bytes not copied so far.
switch p.simplify(info(v).category()) { case valid: continue case disallowed: if err == nil { r, _ := utf8.DecodeRuneInString(s[start:]) err = runeError(r) } continue case mapped, deviation: b = append(b, s[k:start]...) b = info(v).appendMapping(b, s[start:i]) case ignored: b = append(b, s[k:start]...) // drop the rune
case unknown: b = append(b, s[k:start]...) b = append(b, "\ufffd"...) } k = i } if k == 0 { // No changes so far.
if combinedInfoBits&mayNeedNorm != 0 { s = norm.NFC.String(s) } } else { b = append(b, s[k:]...) if norm.NFC.QuickSpan(b) != len(b) { b = norm.NFC.Bytes(b) } // TODO: the punycode converters require strings as input.
s = string(b) } return s, bidi, err }
// A labelIter allows iterating over domain name labels.
type labelIter struct { orig string slice []string curStart int curEnd int i int }
func (l *labelIter) reset() { l.curStart = 0 l.curEnd = 0 l.i = 0 }
func (l *labelIter) done() bool { return l.curStart >= len(l.orig) }
func (l *labelIter) result() string { if l.slice != nil { return strings.Join(l.slice, ".") } return l.orig }
func (l *labelIter) label() string { if l.slice != nil { return l.slice[l.i] } p := strings.IndexByte(l.orig[l.curStart:], '.') l.curEnd = l.curStart + p if p == -1 { l.curEnd = len(l.orig) } return l.orig[l.curStart:l.curEnd] }
// next sets the value to the next label. It skips the last label if it is empty.
func (l *labelIter) next() { l.i++ if l.slice != nil { if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" { l.curStart = len(l.orig) } } else { l.curStart = l.curEnd + 1 if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' { l.curStart = len(l.orig) } } }
func (l *labelIter) set(s string) { if l.slice == nil { l.slice = strings.Split(l.orig, ".") } l.slice[l.i] = s }
// acePrefix is the ASCII Compatible Encoding prefix.
const acePrefix = "xn--"
func (p *Profile) simplify(cat category) category { switch cat { case disallowedSTD3Mapped: if p.useSTD3Rules { cat = disallowed } else { cat = mapped } case disallowedSTD3Valid: if p.useSTD3Rules { cat = disallowed } else { cat = valid } case deviation: if !p.transitional { cat = valid } case validNV8, validXV8: // TODO: handle V2008
cat = valid } return cat }
func validateFromPunycode(p *Profile, s string) error { if !norm.NFC.IsNormalString(s) { return &labelError{s, "V1"} } // TODO: detect whether string may have to be normalized in the following
// loop.
for i := 0; i < len(s); { v, sz := trie.lookupString(s[i:]) if sz == 0 { return runeError(utf8.RuneError) } if c := p.simplify(info(v).category()); c != valid && c != deviation { return &labelError{s, "V6"} } i += sz } return nil }
const ( zwnj = "\u200c" zwj = "\u200d" )
type joinState int8
const ( stateStart joinState = iota stateVirama stateBefore stateBeforeVirama stateAfter stateFAIL )
var joinStates = [][numJoinTypes]joinState{ stateStart: { joiningL: stateBefore, joiningD: stateBefore, joinZWNJ: stateFAIL, joinZWJ: stateFAIL, joinVirama: stateVirama, }, stateVirama: { joiningL: stateBefore, joiningD: stateBefore, }, stateBefore: { joiningL: stateBefore, joiningD: stateBefore, joiningT: stateBefore, joinZWNJ: stateAfter, joinZWJ: stateFAIL, joinVirama: stateBeforeVirama, }, stateBeforeVirama: { joiningL: stateBefore, joiningD: stateBefore, joiningT: stateBefore, }, stateAfter: { joiningL: stateFAIL, joiningD: stateBefore, joiningT: stateAfter, joiningR: stateStart, joinZWNJ: stateFAIL, joinZWJ: stateFAIL, joinVirama: stateAfter, // no-op as we can't accept joiners here
}, stateFAIL: { 0: stateFAIL, joiningL: stateFAIL, joiningD: stateFAIL, joiningT: stateFAIL, joiningR: stateFAIL, joinZWNJ: stateFAIL, joinZWJ: stateFAIL, joinVirama: stateFAIL, }, }
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation.
func (p *Profile) validateLabel(s string) (err error) { if s == "" { if p.verifyDNSLength { return &labelError{s, "A4"} } return nil } if !p.validateLabels { return nil } trie := p.trie // p.validateLabels is only set if trie is set.
if len(s) > 4 && s[2] == '-' && s[3] == '-' { return &labelError{s, "V2"} } if s[0] == '-' || s[len(s)-1] == '-' { return &labelError{s, "V3"} } // TODO: merge the use of this in the trie.
v, sz := trie.lookupString(s) x := info(v) if x.isModifier() { return &labelError{s, "V5"} } // Quickly return in the absence of zero-width (non) joiners.
if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 { return nil } st := stateStart for i := 0; ; { jt := x.joinType() if s[i:i+sz] == zwj { jt = joinZWJ } else if s[i:i+sz] == zwnj { jt = joinZWNJ } st = joinStates[st][jt] if x.isViramaModifier() { st = joinStates[st][joinVirama] } if i += sz; i == len(s) { break } v, sz = trie.lookupString(s[i:]) x = info(v) } if st == stateFAIL || st == stateAfter { return &labelError{s, "C"} } return nil }
func ascii(s string) bool { for i := 0; i < len(s); i++ { if s[i] >= utf8.RuneSelf { return false } } return true }
|