You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

1394 lines
63 KiB

/**
* To validate an email address according to RFCs 5321, 5322 and others
*
* Copyright © 2008-2011, Dominic Sayers
* Test schema documentation Copyright © 2011, Daniel Marschall
* Port for Node.js Copyright © 2013-2014, GlobeSherpa
* and Copyright © 2014-2015, Eli Skeggs
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of Dominic Sayers nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @author Dominic Sayers <dominic@sayers.cc>
* @author Eli Skeggs <skeggse@gmail.com>
* @copyright 2008-2011 Dominic Sayers
* @copyright 2013-2014 GlobeSherpa
* @copyright 2014-2015 Eli Skeggs
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @link http://www.dominicsayers.com/isemail
* @link https://github.com/hapijs/isemail
* @version 1.2.0 Drop Node 0.8, fix style, switch to lab/code
*/
var Dns = require('dns');
var internals = {
defaultThreshold: 16,
maxIPv6Groups: 8,
categories: {
valid: 1,
dnsWarn: 7,
rfc5321: 15,
cfws: 31,
deprecated: 63,
rfc5322: 127,
error: 255
},
diagnoses: {
// Address is valid
valid: 0,
// Address is valid, but the DNS check failed
dnsWarnNoMXRecord: 5,
dnsWarnNoRecord: 6,
// Address is valid for SMTP but has unusual elements
rfc5321TLD: 9,
rfc5321TLDNumeric: 10,
rfc5321QuotedString: 11,
rfc5321AddressLiteral: 12,
// Address is valid for message, but must be modified for envelope
cfwsComment: 17,
cfwsFWS: 18,
// Address contains deprecated elements, but may still be valid in some contexts
deprecatedLocalPart: 33,
deprecatedFWS: 34,
deprecatedQTEXT: 35,
deprecatedQP: 36,
deprecatedComment: 37,
deprecatedCTEXT: 38,
deprecatedIPv6: 39,
deprecatedCFWSNearAt: 49,
// Address is only valid according to broad definition in RFC 5322, but is otherwise invalid
rfc5322Domain: 65,
rfc5322TooLong: 66,
rfc5322LocalTooLong: 67,
rfc5322DomainTooLong: 68,
rfc5322LabelTooLong: 69,
rfc5322DomainLiteral: 70,
rfc5322DomainLiteralOBSDText: 71,
rfc5322IPv6GroupCount: 72,
rfc5322IPv62x2xColon: 73,
rfc5322IPv6BadCharacter: 74,
rfc5322IPv6MaxGroups: 75,
rfc5322IPv6ColonStart: 76,
rfc5322IPv6ColonEnd: 77,
// Address is invalid for any purpose
errExpectingDTEXT: 129,
errNoLocalPart: 130,
errNoDomain: 131,
errConsecutiveDots: 132,
errATEXTAfterCFWS: 133,
errATEXTAfterQS: 134,
errATEXTAfterDomainLiteral: 135,
errExpectingQPair: 136,
errExpectingATEXT: 137,
errExpectingQTEXT: 138,
errExpectingCTEXT: 139,
errBackslashEnd: 140,
errDotStart: 141,
errDotEnd: 142,
errDomainHyphenStart: 143,
errDomainHyphenEnd: 144,
errUnclosedQuotedString: 145,
errUnclosedComment: 146,
errUnclosedDomainLiteral: 147,
errFWSCRLFx2: 148,
errFWSCRLFEnd: 149,
errCRNoLF: 150,
errUnknownTLD: 160,
errDomainTooShort: 161
},
components: {
localpart: 0,
domain: 1,
literal: 2,
contextComment: 3,
contextFWS: 4,
contextQuotedString: 5,
contextQuotedPair: 6
}
};
// $lab:coverage:off$
internals.defer = typeof process !== 'undefined' && process && typeof process.nextTick === 'function' ?
process.nextTick.bind(process) :
function (callback) {
return setTimeout(callback, 0);
};
// $lab:coverage:on$
// US-ASCII visible characters not valid for atext
// (http://tools.ietf.org/html/rfc5322#section-3.2.3)
var SPECIALS = '()<>[]:;@\\,."';
// A silly little optimized function generator
var optimizeLookup = function optimizeLookup (string) {
var lookup = new Array(0x100);
for (var i = 0xff; i >= 0; --i) {
lookup[i] = false;
}
for (var il = string.length; i < il; ++i) {
lookup[string.charCodeAt(i)] = true;
}
var body = 'return function (code) {\n';
body += ' return lookup[code];\n';
body += '}';
return (new Function('lookup', body))(lookup);
};
var specialsLookup = optimizeLookup(SPECIALS);
// This matches valid IPv4 addresses from the end of a string
var IPv4_REGEX =
/\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$/;
var IPv6_REGEX = /^[a-fA-F\d]{0,4}$/;
var IPv6_REGEX_TEST = IPv6_REGEX.test.bind(IPv6_REGEX);
var hasOwn = Object.prototype.hasOwnProperty;
/**
* Check that an email address conforms to RFCs 5321, 5322 and others
*
* We distinguish clearly between a Mailbox as defined by RFC 5321 and an
* addr-spec as defined by RFC 5322. Depending on the context, either can be
* regarded as a valid email address. The RFC 5321 Mailbox specification is
* more restrictive (comments, white space and obsolete forms are not allowed).
*
* @param {string} email The email address to check.
* @param {Object} options The (optional) options:
* {boolean} checkDNS If true then will check DNS for MX records. If
* true this call to isEmail _will_ be asynchronous.
* {*} errorLevel Determines the boundary between valid and invalid
* addresses. Status codes above this number will be returned as-is, status
* codes below will be returned as valid. Thus the calling program can
* simply look for diagnoses.valid if it is only interested in whether an
* address is valid or not. The errorLevel will determine how "picky"
* isEmail() is about the address. If omitted or passed as false then
* isEmail() will return true or false rather than an integer error or
* warning. NB Note the difference between errorLevel = false and
* errorLevel = 0.
* @param {function(number|boolean)} callback The (optional) callback handler.
* @return {*}
*/
var isEmail = function isEmail (email, options, callback) {
if (typeof options === 'function') {
callback = options;
options = {};
}
if (!options) {
options = {};
}
if (typeof callback !== 'function') {
if (options.checkDNS) {
throw new TypeError('expected callback function for checkDNS option');
}
callback = null;
}
var diagnose;
var threshold;
if (typeof options.errorLevel === 'number') {
diagnose = true;
threshold = options.errorLevel;
}
else {
diagnose = !!options.errorLevel;
threshold = internals.diagnoses.valid;
}
if (options.tldWhitelist) {
if (typeof options.tldWhitelist === 'string') {
options.tldWhitelist = [options.tldWhitelist];
} else if (typeof options.tldWhitelist !== 'object') {
throw new TypeError('expected array or object tldWhitelist');
}
}
if (options.minDomainAtoms && (options.minDomainAtoms !== ((+options.minDomainAtoms) | 0) || options.minDomainAtoms < 0)) {
throw new TypeError('expected positive integer minDomainAtoms');
}
var maxResult = internals.diagnoses.valid;
var updateResult = function updateResult (value) {
if (value > maxResult) {
maxResult = value;
}
};
var context = {
now: internals.components.localpart,
prev: internals.components.localpart,
stack: [internals.components.localpart]
};
var token;
var prevToken = '';
var charCode = 0;
var parseData = {
local: '',
domain: ''
};
var atomData = {
locals: [''],
domains: ['']
};
var elementCount = 0;
var elementLength = 0;
var crlfCount = 0;
var hyphenFlag = false;
var assertEnd = false;
var emailLength = email.length;
for (var i = 0, il = emailLength; i < il; ++i) {
// Token is used outside the loop, must declare similarly
token = email[i];
switch (context.now) {
// Local-part
case internals.components.localpart:
// http://tools.ietf.org/html/rfc5322#section-3.4.1
// local-part = dot-atom / quoted-string / obs-local-part
//
// dot-atom = [CFWS] dot-atom-text [CFWS]
//
// dot-atom-text = 1*atext *("." 1*atext)
//
// quoted-string = [CFWS]
// DQUOTE *([FWS] qcontent) [FWS] DQUOTE
// [CFWS]
//
// obs-local-part = word *("." word)
//
// word = atom / quoted-string
//
// atom = [CFWS] 1*atext [CFWS]
switch (token) {
// Comment
case '(':
if (elementLength === 0) {
// Comments are OK at the beginning of an element
updateResult(elementCount === 0 ? internals.diagnoses.cfwsComment : internals.diagnoses.deprecatedComment);
}
else {
updateResult(internals.diagnoses.cfwsComment);
// Cannot start a comment in an element, should be end
assertEnd = true;
}
context.stack.push(context.now);
context.now = internals.components.contextComment;
break;
// Next dot-atom element
case '.':
if (elementLength === 0) {
// Another dot, already?
updateResult(elementCount === 0 ? internals.diagnoses.errDotStart : internals.diagnoses.errConsecutiveDots);
}
else {
// The entire local-part can be a quoted string for RFC 5321; if one atom is quoted it's an RFC 5322 obsolete form
if (assertEnd) {
updateResult(internals.diagnoses.deprecatedLocalPart);
}
// CFWS & quoted strings are OK again now we're at the beginning of an element (although they are obsolete forms)
assertEnd = false;
elementLength = 0;
++elementCount;
parseData.local += token;
atomData.locals[elementCount] = '';
}
break;
// Quoted string
case '"':
if (elementLength === 0) {
// The entire local-part can be a quoted string for RFC 5321; if one atom is quoted it's an RFC 5322 obsolete form
updateResult(elementCount === 0 ? internals.diagnoses.rfc5321QuotedString : internals.diagnoses.deprecatedLocalPart);
parseData.local += token;
atomData.locals[elementCount] += token;
++elementLength;
// Quoted string must be the entire element
assertEnd = true;
context.stack.push(context.now);
context.now = internals.components.contextQuotedString;
}
else {
updateResult(internals.diagnoses.errExpectingATEXT);
}
break;
// Folding white space
case '\r':
if (emailLength === ++i || email[i] !== '\n') {
// Fatal error
updateResult(internals.diagnoses.errCRNoLF);
break;
}
// Fallthrough
case ' ':
case '\t':
if (elementLength === 0) {
updateResult(elementCount === 0 ? internals.diagnoses.cfwsFWS : internals.diagnoses.deprecatedFWS);
}
else {
// We can't start FWS in the middle of an element, better be end
assertEnd = true;
}
context.stack.push(context.now);
context.now = internals.components.contextFWS;
prevToken = token;
break;
case '@':
// At this point we should have a valid local-part
// $lab:coverage:off$
if (context.stack.length !== 1) {
throw new Error('unexpected item on context stack');
}
// $lab:coverage:on$
if (parseData.local.length === 0) {
// Fatal error
updateResult(internals.diagnoses.errNoLocalPart);
}
else if (elementLength === 0) {
// Fatal error
updateResult(internals.diagnoses.errDotEnd);
}
// http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1 the maximum total length of a user name or other local-part is 64
// octets
else if (parseData.local.length > 64) {
updateResult(internals.diagnoses.rfc5322LocalTooLong);
}
// http://tools.ietf.org/html/rfc5322#section-3.4.1 comments and folding white space SHOULD NOT be used around "@" in the
// addr-spec
//
// http://tools.ietf.org/html/rfc2119
// 4. SHOULD NOT this phrase, or the phrase "NOT RECOMMENDED" mean that there may exist valid reasons in particular
// circumstances when the particular behavior is acceptable or even useful, but the full implications should be understood
// and the case carefully weighed before implementing any behavior described with this label.
else if (context.prev === internals.components.contextComment || context.prev === internals.components.contextFWS) {
updateResult(internals.diagnoses.deprecatedCFWSNearAt);
}
// Clear everything down for the domain parsing
context.now = internals.components.domain;
context.stack[0] = internals.components.domain;
elementCount = 0;
elementLength = 0;
assertEnd = false; // CFWS can only appear at the end of the element
break;
// ATEXT
default:
// http://tools.ietf.org/html/rfc5322#section-3.2.3
// atext = ALPHA / DIGIT / ; Printable US-ASCII
// "!" / "#" / ; characters not including
// "$" / "%" / ; specials. Used for atoms.
// "&" / "'" /
// "*" / "+" /
// "-" / "/" /
// "=" / "?" /
// "^" / "_" /
// "`" / "{" /
// "|" / "}" /
// "~"
if (assertEnd) {
// We have encountered atext where it is no longer valid
switch (context.prev) {
case internals.components.contextComment:
case internals.components.contextFWS:
updateResult(internals.diagnoses.errATEXTAfterCFWS);
break;
case internals.components.contextQuotedString:
updateResult(internals.diagnoses.errATEXTAfterQS);
break;
// $lab:coverage:off$
default:
throw new Error('more atext found where none is allowed, but unrecognized prev context: ' + context.prev);
// $lab:coverage:on$
}
}
else {
context.prev = context.now;
charCode = token.charCodeAt(0);
// Especially if charCode == 10
if (charCode < 33 || charCode > 126 || specialsLookup(charCode)) {
// Fatal error
updateResult(internals.diagnoses.errExpectingATEXT);
}
parseData.local += token;
atomData.locals[elementCount] += token;
++elementLength;
}
}
break;
case internals.components.domain:
// http://tools.ietf.org/html/rfc5322#section-3.4.1
// domain = dot-atom / domain-literal / obs-domain
//
// dot-atom = [CFWS] dot-atom-text [CFWS]
//
// dot-atom-text = 1*atext *("." 1*atext)
//
// domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
//
// dtext = %d33-90 / ; Printable US-ASCII
// %d94-126 / ; characters not including
// obs-dtext ; "[", "]", or "\"
//
// obs-domain = atom *("." atom)
//
// atom = [CFWS] 1*atext [CFWS]
// http://tools.ietf.org/html/rfc5321#section-4.1.2
// Mailbox = Local-part "@" ( Domain / address-literal )
//
// Domain = sub-domain *("." sub-domain)
//
// address-literal = "[" ( IPv4-address-literal /
// IPv6-address-literal /
// General-address-literal ) "]"
// ; See Section 4.1.3
// http://tools.ietf.org/html/rfc5322#section-3.4.1
// Note: A liberal syntax for the domain portion of addr-spec is
// given here. However, the domain portion contains addressing
// information specified by and used in other protocols (e.g.,
// [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore
// incumbent upon implementations to conform to the syntax of
// addresses for the context in which they are used.
//
// is_email() author's note: it's not clear how to interpret this in
// he context of a general email address validator. The conclusion I
// have reached is this: "addressing information" must comply with
// RFC 5321 (and in turn RFC 1035), anything that is "semantically
// invisible" must comply only with RFC 5322.
switch (token) {
// Comment
case '(':
if (elementLength === 0) {
// Comments at the start of the domain are deprecated in the text, comments at the start of a subdomain are obs-domain
// http://tools.ietf.org/html/rfc5322#section-3.4.1
updateResult(elementCount === 0 ? internals.diagnoses.deprecatedCFWSNearAt : internals.diagnoses.deprecatedComment);
}
else {
// We can't start a comment mid-element, better be at the end
assertEnd = true;
updateResult(internals.diagnoses.cfwsComment);
}
context.stack.push(context.now);
context.now = internals.components.contextComment;
break;
// Next dot-atom element
case '.':
if (elementLength === 0) {
// Another dot, already? Fatal error.
updateResult(elementCount === 0 ? internals.diagnoses.errDotStart : internals.diagnoses.errConsecutiveDots);
}
else if (hyphenFlag) {
// Previous subdomain ended in a hyphen. Fatal error.
updateResult(internals.diagnoses.errDomainHyphenEnd);
}
else if (elementLength > 63) {
// Nowhere in RFC 5321 does it say explicitly that the domain part of a Mailbox must be a valid domain according to the
// DNS standards set out in RFC 1035, but this *is* implied in several places. For instance, wherever the idea of host
// routing is discussed the RFC says that the domain must be looked up in the DNS. This would be nonsense unless the
// domain was designed to be a valid DNS domain. Hence we must conclude that the RFC 1035 restriction on label length
// also applies to RFC 5321 domains.
//
// http://tools.ietf.org/html/rfc1035#section-2.3.4
// labels 63 octets or less
updateResult(internals.diagnoses.rfc5322LabelTooLong);
}
// CFWS is OK again now we're at the beginning of an element (although
// it may be obsolete CFWS)
assertEnd = false;
elementLength = 0;
++elementCount;
atomData.domains[elementCount] = '';
parseData.domain += token;
break;
// Domain literal
case '[':
if (parseData.domain.length === 0) {
// Domain literal must be the only component
assertEnd = true;
++elementLength;
context.stack.push(context.now);
context.now = internals.components.literal;
parseData.domain += token;
atomData.domains[elementCount] += token;
parseData.literal = '';
}
else {
// Fatal error
updateResult(internals.diagnoses.errExpectingATEXT);
}
break;
// Folding white space
case '\r':
if (emailLength === ++i || email[i] !== '\n') {
// Fatal error
updateResult(internals.diagnoses.errCRNoLF);
break;
}
// Fallthrough
case ' ':
case '\t':
if (elementLength === 0) {
updateResult(elementCount === 0 ? internals.diagnoses.deprecatedCFWSNearAt : internals.diagnoses.deprecatedFWS);
}
else {
// We can't start FWS in the middle of an element, so this better be the end
updateResult(internals.diagnoses.cfwsFWS);
assertEnd = true;
}
context.stack.push(context.now);
context.now = internals.components.contextFWS;
prevToken = token;
break;
// This must be ATEXT
default:
// RFC 5322 allows any atext...
// http://tools.ietf.org/html/rfc5322#section-3.2.3
// atext = ALPHA / DIGIT / ; Printable US-ASCII
// "!" / "#" / ; characters not including
// "$" / "%" / ; specials. Used for atoms.
// "&" / "'" /
// "*" / "+" /
// "-" / "/" /
// "=" / "?" /
// "^" / "_" /
// "`" / "{" /
// "|" / "}" /
// "~"
// But RFC 5321 only allows letter-digit-hyphen to comply with DNS rules
// (RFCs 1034 & 1123)
// http://tools.ietf.org/html/rfc5321#section-4.1.2
// sub-domain = Let-dig [Ldh-str]
//
// Let-dig = ALPHA / DIGIT
//
// Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
//
if (assertEnd) {
// We have encountered ATEXT where it is no longer valid
switch (context.prev) {
case internals.components.contextComment:
case internals.components.contextFWS:
updateResult(internals.diagnoses.errATEXTAfterCFWS);
break;
case internals.components.literal:
updateResult(internals.diagnoses.errATEXTAfterDomainLiteral);
break;
// $lab:coverage:off$
default:
throw new Error('more atext found where none is allowed, but unrecognized prev context: ' + context.prev);
// $lab:coverage:on$
}
}
charCode = token.charCodeAt(0);
// Assume this token isn't a hyphen unless we discover it is
hyphenFlag = false;
if (charCode < 33 || charCode > 126 || specialsLookup(charCode)) {
// Fatal error
updateResult(internals.diagnoses.errExpectingATEXT);
}
else if (token === '-') {
if (elementLength === 0) {
// Hyphens cannot be at the beginning of a subdomain, fatal error
updateResult(internals.diagnoses.errDomainHyphenStart);
}
hyphenFlag = true;
}
// Check if it's a neither a number nor a latin letter
else if (charCode < 48 || charCode > 122 || (charCode > 57 && charCode < 65) || (charCode > 90 && charCode < 97)) {
// This is not an RFC 5321 subdomain, but still OK by RFC 5322
updateResult(internals.diagnoses.rfc5322Domain);
}
parseData.domain += token;
atomData.domains[elementCount] += token;
++elementLength;
}
break;
// Domain literal
case internals.components.literal:
// http://tools.ietf.org/html/rfc5322#section-3.4.1
// domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
//
// dtext = %d33-90 / ; Printable US-ASCII
// %d94-126 / ; characters not including
// obs-dtext ; "[", "]", or "\"
//
// obs-dtext = obs-NO-WS-CTL / quoted-pair
switch (token) {
// End of domain literal
case ']':
if (maxResult < internals.categories.deprecated) {
// Could be a valid RFC 5321 address literal, so let's check
// http://tools.ietf.org/html/rfc5321#section-4.1.2
// address-literal = "[" ( IPv4-address-literal /
// IPv6-address-literal /
// General-address-literal ) "]"
// ; See Section 4.1.3
//
// http://tools.ietf.org/html/rfc5321#section-4.1.3
// IPv4-address-literal = Snum 3("." Snum)
//
// IPv6-address-literal = "IPv6:" IPv6-addr
//
// General-address-literal = Standardized-tag ":" 1*dcontent
//
// Standardized-tag = Ldh-str
// ; Standardized-tag MUST be specified in a
// ; Standards-Track RFC and registered with IANA
//
// dcontent = %d33-90 / ; Printable US-ASCII
// %d94-126 ; excl. "[", "\", "]"
//
// Snum = 1*3DIGIT
// ; representing a decimal integer
// ; value in the range 0 through 255
//
// IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
//
// IPv6-hex = 1*4HEXDIG
//
// IPv6-full = IPv6-hex 7(":" IPv6-hex)
//
// IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"
// [IPv6-hex *5(":" IPv6-hex)]
// ; The "::" represents at least 2 16-bit groups of
// ; zeros. No more than 6 groups in addition to the
// ; "::" may be present.
//
// IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
//
// IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
// [IPv6-hex *3(":" IPv6-hex) ":"]
// IPv4-address-literal
// ; The "::" represents at least 2 16-bit groups of
// ; zeros. No more than 4 groups in addition to the
// ; "::" and IPv4-address-literal may be present.
var index = -1;
var addressLiteral = parseData.literal;
var matchesIP = IPv4_REGEX.exec(addressLiteral);
// Maybe extract IPv4 part from the end of the address-literal
if (matchesIP) {
index = matchesIP.index;
if (index !== 0) {
// Convert IPv4 part to IPv6 format for futher testing
addressLiteral = addressLiteral.slice(0, index) + '0:0';
}
}
if (index === 0) {
// Nothing there except a valid IPv4 address, so...
updateResult(internals.diagnoses.rfc5321AddressLiteral);
}
else if (addressLiteral.slice(0, 5).toLowerCase() !== 'ipv6:') {
updateResult(internals.diagnoses.rfc5322DomainLiteral);
}
else {
var match = addressLiteral.slice(5);
var maxGroups = internals.maxIPv6Groups;
var groups = match.split(':');
index = match.indexOf('::');
if (!~index) {
// Need exactly the right number of groups
if (groups.length !== maxGroups) {
updateResult(internals.diagnoses.rfc5322IPv6GroupCount);
}
}
else if (index !== match.lastIndexOf('::')) {
updateResult(internals.diagnoses.rfc5322IPv62x2xColon);
}
else {
if (index === 0 || index === match.length - 2) {
// RFC 4291 allows :: at the start or end of an address with 7 other groups in addition
++maxGroups;
}
if (groups.length > maxGroups) {
updateResult(internals.diagnoses.rfc5322IPv6MaxGroups);
}
else if (groups.length === maxGroups) {
// Eliding a single "::"
updateResult(internals.diagnoses.deprecatedIPv6);
}
}
// IPv6 testing strategy
if (match[0] === ':' && match[1] !== ':') {
updateResult(internals.diagnoses.rfc5322IPv6ColonStart);
}
else if (match[match.length - 1] === ':' && match[match.length - 2] !== ':') {
updateResult(internals.diagnoses.rfc5322IPv6ColonEnd);
}
else if (groups.every(IPv6_REGEX_TEST)) {
updateResult(internals.diagnoses.rfc5321AddressLiteral);
}
else {
updateResult(internals.diagnoses.rfc5322IPv6BadCharacter);
}
}
}
else {
updateResult(internals.diagnoses.rfc5322DomainLiteral);
}
parseData.domain += token;
atomData.domains[elementCount] += token;
++elementLength;
context.prev = context.now;
context.now = context.stack.pop();
break;
case '\\':
updateResult(internals.diagnoses.rfc5322DomainLiteralOBSDText);
context.stack.push(context.now);
context.now = internals.components.contextQuotedPair;
break;
// Folding white space
case '\r':
if (emailLength === ++i || email[i] !== '\n') {
updateResult(internals.diagnoses.errCRNoLF);
break;
}
// Fallthrough
case ' ':
case '\t':
updateResult(internals.diagnoses.cfwsFWS);
context.stack.push(context.now);
context.now = internals.components.contextFWS;
prevToken = token;
break;
// DTEXT
default:
// http://tools.ietf.org/html/rfc5322#section-3.4.1
// dtext = %d33-90 / ; Printable US-ASCII
// %d94-126 / ; characters not including
// obs-dtext ; "[", "]", or "\"
//
// obs-dtext = obs-NO-WS-CTL / quoted-pair
//
// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
// %d11 / ; characters that do not
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
charCode = token.charCodeAt(0);
// '\r', '\n', ' ', and '\t' have already been parsed above
if (charCode > 127 || charCode === 0 || token === '[') {
// Fatal error
updateResult(internals.diagnoses.errExpectingDTEXT);
break;
}
else if (charCode < 33 || charCode === 127) {
updateResult(internals.diagnoses.rfc5322DomainLiteralOBSDText);
}
parseData.literal += token;
parseData.domain += token;
atomData.domains[elementCount] += token;
++elementLength;
}
break;
// Quoted string
case internals.components.contextQuotedString:
// http://tools.ietf.org/html/rfc5322#section-3.2.4
// quoted-string = [CFWS]
// DQUOTE *([FWS] qcontent) [FWS] DQUOTE
// [CFWS]
//
// qcontent = qtext / quoted-pair
switch (token) {
// Quoted pair
case '\\':
context.stack.push(context.now);
context.now = internals.components.contextQuotedPair;
break;
// Folding white space. Spaces are allowed as regular characters inside a quoted string - it's only FWS if we include '\t' or '\r\n'
case '\r':
if (emailLength === ++i || email[i] !== '\n') {
// Fatal error
updateResult(internals.diagnoses.errCRNoLF);
break;
}
// Fallthrough
case '\t':
// http://tools.ietf.org/html/rfc5322#section-3.2.2
// Runs of FWS, comment, or CFWS that occur between lexical tokens in
// a structured header field are semantically interpreted as a single
// space character.
// http://tools.ietf.org/html/rfc5322#section-3.2.4
// the CRLF in any FWS/CFWS that appears within the quoted-string [is]
// semantically "invisible" and therefore not part of the
// quoted-string
parseData.local += ' ';
atomData.locals[elementCount] += ' ';
++elementLength;
updateResult(internals.diagnoses.cfwsFWS);
context.stack.push(context.now);
context.now = internals.components.contextFWS;
prevToken = token;
break;
// End of quoted string
case '"':
parseData.local += token;
atomData.locals[elementCount] += token;
++elementLength;
context.prev = context.now;
context.now = context.stack.pop();
break;
// QTEXT
default:
// http://tools.ietf.org/html/rfc5322#section-3.2.4
// qtext = %d33 / ; Printable US-ASCII
// %d35-91 / ; characters not including
// %d93-126 / ; "\" or the quote character
// obs-qtext
//
// obs-qtext = obs-NO-WS-CTL
//
// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
// %d11 / ; characters that do not
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
charCode = token.charCodeAt(0);
if (charCode > 127 || charCode === 0 || charCode === 10) {
updateResult(internals.diagnoses.errExpectingQTEXT);
}
else if (charCode < 32 || charCode === 127) {
updateResult(internals.diagnoses.deprecatedQTEXT);
}
parseData.local += token;
atomData.locals[elementCount] += token;
++elementLength;
}
// http://tools.ietf.org/html/rfc5322#section-3.4.1
// If the string can be represented as a dot-atom (that is, it contains
// no characters other than atext characters or "." surrounded by atext
// characters), then the dot-atom form SHOULD be used and the quoted-
// string form SHOULD NOT be used.
break;
// Quoted pair
case internals.components.contextQuotedPair:
// http://tools.ietf.org/html/rfc5322#section-3.2.1
// quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
//
// VCHAR = %d33-126 ; visible (printing) characters
// WSP = SP / HTAB ; white space
//
// obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
//
// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
// %d11 / ; characters that do not
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
//
// i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127)
charCode = token.charCodeAt(0);
if (charCode > 127) {
// Fatal error
updateResult(internals.diagnoses.errExpectingQPair);
}
else if ((charCode < 31 && charCode !== 9) || charCode === 127) {
// ' ' and '\t' are allowed
updateResult(internals.diagnoses.deprecatedQP);
}
// At this point we know where this qpair occurred so we could check to see if the character actually needed to be quoted at all.
// http://tools.ietf.org/html/rfc5321#section-4.1.2
// the sending system SHOULD transmit the form that uses the minimum quoting possible.
context.prev = context.now;
// End of qpair
context.now = context.stack.pop();
token = '\\' + token;
switch (context.now) {
case internals.components.contextComment:
break;
case internals.components.contextQuotedString:
parseData.local += token;
atomData.locals[elementCount] += token;
// The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
elementLength += 2;
break;
case internals.components.literal:
parseData.domain += token;
atomData.domains[elementCount] += token;
// The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
elementLength += 2;
break;
// $lab:coverage:off$
default:
throw new Error('quoted pair logic invoked in an invalid context: ' + context.now);
// $lab:coverage:on$
}
break;
// Comment
case internals.components.contextComment:
// http://tools.ietf.org/html/rfc5322#section-3.2.2
// comment = "(" *([FWS] ccontent) [FWS] ")"
//
// ccontent = ctext / quoted-pair / comment
switch (token) {
// Nested comment
case '(':
// Nested comments are ok
context.stack.push(context.now);
context.now = internals.components.contextComment;
break;
// End of comment
case ')':
context.prev = context.now;
context.now = context.stack.pop();
break;
// Quoted pair
case '\\':
context.stack.push(context.now);
context.now = internals.components.contextQuotedPair;
break;
// Folding white space
case '\r':
if (emailLength === ++i || email[i] !== '\n') {
// Fatal error
updateResult(internals.diagnoses.errCRNoLF);
break;
}
// Fallthrough
case ' ':
case '\t':
updateResult(internals.diagnoses.cfwsFWS);
context.stack.push(context.now);
context.now = internals.components.contextFWS;
prevToken = token;
break;
// CTEXT
default:
// http://tools.ietf.org/html/rfc5322#section-3.2.3
// ctext = %d33-39 / ; Printable US-ASCII
// %d42-91 / ; characters not including
// %d93-126 / ; "(", ")", or "\"
// obs-ctext
//
// obs-ctext = obs-NO-WS-CTL
//
// obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
// %d11 / ; characters that do not
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
charCode = token.charCodeAt(0);
if (charCode > 127 || charCode === 0 || charCode === 10) {
// Fatal error
updateResult(internals.diagnoses.errExpectingCTEXT);
break;
}
else if (charCode < 32 || charCode === 127) {
updateResult(internals.diagnoses.deprecatedCTEXT);
}
}
break;
// Folding white space
case internals.components.contextFWS:
// http://tools.ietf.org/html/rfc5322#section-3.2.2
// FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
// ; Folding white space
// But note the erratum:
// http://www.rfc-editor.org/errata_search.php?rfc=5322&eid=1908:
// In the obsolete syntax, any amount of folding white space MAY be
// inserted where the obs-FWS rule is allowed. This creates the
// possibility of having two consecutive "folds" in a line, and
// therefore the possibility that a line which makes up a folded header
// field could be composed entirely of white space.
//
// obs-FWS = 1*([CRLF] WSP)
if (prevToken === '\r') {
if (token === '\r') {
// Fatal error
updateResult(internals.diagnoses.errFWSCRLFx2);
break;
}
if (++crlfCount > 1) {
// Multiple folds => obsolete FWS
updateResult(internals.diagnoses.deprecatedFWS);
}
else {
crlfCount = 1;
}
}
switch (token) {
case '\r':
if (emailLength === ++i || email[i] !== '\n') {
// Fatal error
updateResult(internals.diagnoses.errCRNoLF);
}
break;
case ' ':
case '\t':
break;
default:
if (prevToken === '\r') {
// Fatal error
updateResult(internals.diagnoses.errFWSCRLFEnd);
}
crlfCount = 0;
// End of FWS
context.prev = context.now;
context.now = context.stack.pop();
// Look at this token again in the parent context
--i;
}
prevToken = token;
break;
// Unexpected context
// $lab:coverage:off$
default:
throw new Error('unknown context: ' + context.now);
// $lab:coverage:on$
} // Primary state machine
if (maxResult > internals.categories.rfc5322) {
// Fatal error, no point continuing
break;
}
} // Token loop
// Check for errors
if (maxResult < internals.categories.rfc5322) {
// Fatal errors
if (context.now === internals.components.contextQuotedString) {
updateResult(internals.diagnoses.errUnclosedQuotedString);
}
else if (context.now === internals.components.contextQuotedPair) {
updateResult(internals.diagnoses.errBackslashEnd);
}
else if (context.now === internals.components.contextComment) {
updateResult(internals.diagnoses.errUnclosedComment);
}
else if (context.now === internals.components.literal) {
updateResult(internals.diagnoses.errUnclosedDomainLiteral);
}
else if (token === '\r') {
updateResult(internals.diagnoses.errFWSCRLFEnd);
}
else if (parseData.domain.length === 0) {
updateResult(internals.diagnoses.errNoDomain);
}
else if (elementLength === 0) {
updateResult(internals.diagnoses.errDotEnd);
}
else if (hyphenFlag) {
updateResult(internals.diagnoses.errDomainHyphenEnd);
}
// Other errors
else if (parseData.domain.length > 255) {
// http://tools.ietf.org/html/rfc5321#section-4.5.3.1.2
// The maximum total length of a domain name or number is 255 octets.
updateResult(internals.diagnoses.rfc5322DomainTooLong);
}
else if (parseData.local.length + parseData.domain.length + /* '@' */ 1 > 254) {
// http://tools.ietf.org/html/rfc5321#section-4.1.2
// Forward-path = Path
//
// Path = "<" [ A-d-l ":" ] Mailbox ">"
//
// http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3
// The maximum total length of a reverse-path or forward-path is 256 octets (including the punctuation and element separators).
//
// Thus, even without (obsolete) routing information, the Mailbox can only be 254 characters long. This is confirmed by this verified
// erratum to RFC 3696:
//
// http://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690
// However, there is a restriction in RFC 2821 on the length of an address in MAIL and RCPT commands of 254 characters. Since
// addresses that do not fit in those fields are not normally useful, the upper limit on address lengths should normally be considered
// to be 254.
updateResult(internals.diagnoses.rfc5322TooLong);
}
else if (elementLength > 63) {
// http://tools.ietf.org/html/rfc1035#section-2.3.4
// labels 63 octets or less
updateResult(internals.diagnoses.rfc5322LabelTooLong);
}
else if (options.minDomainAtoms && atomData.domains.length < options.minDomainAtoms) {
updateResult(internals.diagnoses.errDomainTooShort);
}
else if (options.tldWhitelist) {
var tldAtom = atomData.domains[elementCount];
if (Array.isArray(options.tldWhitelist)) {
var tldValid = false;
for (i = 0, il = options.tldWhitelist.length; i < il; ++i) {
if (tldAtom === options.tldWhitelist[i]) {
tldValid = true;
break;
}
}
if (!tldValid) {
updateResult(internals.diagnoses.errUnknownTLD);
}
}
else if (!hasOwn.call(options.tldWhitelist, tldAtom)) {
updateResult(internals.diagnoses.errUnknownTLD);
}
}
} // Check for errors
var dnsPositive = false;
var finishImmediately = false;
var finish = function finish () {
if (!dnsPositive && maxResult < internals.categories.dnsWarn) {
// Per RFC 5321, domain atoms are limited to letter-digit-hyphen, so we only need to check code <= 57 to check for a digit
var code = atomData.domains[elementCount].charCodeAt(0);
if (code <= 57) {
updateResult(internals.diagnoses.rfc5321TLDNumeric);
}
else if (elementCount === 0) {
updateResult(internals.diagnoses.rfc5321TLD);
}
}
if (maxResult < threshold) {
maxResult = internals.diagnoses.valid;
}
var finishResult = diagnose ? maxResult : maxResult < internals.defaultThreshold;
if (callback) {
if (finishImmediately) {
callback(finishResult);
} else {
internals.defer(callback.bind(null, finishResult));
}
}
return finishResult;
}; // Finish
if (options.checkDNS && maxResult < internals.categories.dnsWarn) {
// http://tools.ietf.org/html/rfc5321#section-2.3.5
// Names that can be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed in Section 5) are permitted, as are CNAME RRs whose
// targets can be resolved, in turn, to MX or address RRs.
//
// http://tools.ietf.org/html/rfc5321#section-5.1
// The lookup first attempts to locate an MX record associated with the name. If a CNAME record is found, the resulting name is processed
// as if it were the initial name. ... If an empty list of MXs is returned, the address is treated as if it was associated with an implicit
// MX RR, with a preference of 0, pointing to that host.
//
// isEmail() author's note: We will regard the existence of a CNAME to be sufficient evidence of the domain's existence. For performance
// reasons we will not repeat the DNS lookup for the CNAME's target, but we will raise a warning because we didn't immediately find an MX
// record.
if (elementCount === 0) {
// Checking TLD DNS only works if you explicitly check from the root
parseData.domain += '.';
}
var dnsDomain = parseData.domain;
Dns.resolveMx(dnsDomain, function resolveDNS (err, mxRecords) {
// If we have a fatal error, then we must assume that there are no records
if (err && err.code !== Dns.NODATA) {
updateResult(internals.diagnoses.dnsWarnNoRecord);
return finish();
}
if (mxRecords && mxRecords.length) {
dnsPositive = true;
return finish();
}
var count = 3;
var done = false;
updateResult(internals.diagnoses.dnsWarnNoMXRecord);
var handleRecords = function handleRecords (err, records) {
if (done) {
return;
}
--count;
if (records && records.length) {
done = true;
return finish();
}
if (count === 0) {
// No usable records for the domain can be found
updateResult(internals.diagnoses.dnsWarnNoRecord);
done = true;
finish();
}
};
Dns.resolveCname(dnsDomain, handleRecords);
Dns.resolve4(dnsDomain, handleRecords);
Dns.resolve6(dnsDomain, handleRecords);
});
finishImmediately = true;
}
else {
var result = finish();
finishImmediately = true;
return result;
} // CheckDNS
};
isEmail.diagnoses = (function exportDiagnoses () {
var diag = {};
for (var key in internals.diagnoses) {
diag[key] = internals.diagnoses[key];
}
return diag;
})();
module.exports = isEmail;