/*! http://mths.be/utf8js v2.0.0 by @mathias */ ;(function(root) { // Detect free variables `exports` var freeExports = typeof exports == 'object' && exports; // Detect free variable `module` var freeModule = typeof module == 'object' && module && module.exports == freeExports && module; // Detect free variable `global`, from Node.js or Browserified code, // and use it as `root` var freeGlobal = typeof global == 'object' && global; if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) { root = freeGlobal; } /*--------------------------------------------------------------------------*/ var stringFromCharCode = String.fromCharCode; // Taken from http://mths.be/punycode function ucs2decode(string) { var output = []; var counter = 0; var length = string.length; var value; var extra; while (counter < length) { value = string.charCodeAt(counter++); if (value >= 0xD800 && value <= 0xDBFF && counter < length) { // high surrogate, and there is a next character extra = string.charCodeAt(counter++); if ((extra & 0xFC00) == 0xDC00) { // low surrogate output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000); } else { // unmatched surrogate; only append this code unit, in case the next // code unit is the high surrogate of a surrogate pair output.push(value); counter--; } } else { output.push(value); } } return output; } // Taken from http://mths.be/punycode function ucs2encode(array) { var length = array.length; var index = -1; var value; var output = ''; while (++index < length) { value = array[index]; if (value > 0xFFFF) { value -= 0x10000; output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800); value = 0xDC00 | value & 0x3FF; } output += stringFromCharCode(value); } return output; } /*--------------------------------------------------------------------------*/ function createByte(codePoint, shift) { return stringFromCharCode(((codePoint >> shift) & 0x3F) | 0x80); } function encodeCodePoint(codePoint) { if ((codePoint & 0xFFFFFF80) == 0) { // 1-byte sequence return stringFromCharCode(codePoint); } var symbol = ''; if ((codePoint & 0xFFFFF800) == 0) { // 2-byte sequence symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0); } else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0); symbol += createByte(codePoint, 6); } else if ((codePoint & 0xFFE00000) == 0) { // 4-byte sequence symbol = stringFromCharCode(((codePoint >> 18) & 0x07) | 0xF0); symbol += createByte(codePoint, 12); symbol += createByte(codePoint, 6); } symbol += stringFromCharCode((codePoint & 0x3F) | 0x80); return symbol; } function utf8encode(string) { var codePoints = ucs2decode(string); // console.log(JSON.stringify(codePoints.map(function(x) { // return 'U+' + x.toString(16).toUpperCase(); // }))); var length = codePoints.length; var index = -1; var codePoint; var byteString = ''; while (++index < length) { codePoint = codePoints[index]; byteString += encodeCodePoint(codePoint); } return byteString; } /*--------------------------------------------------------------------------*/ function readContinuationByte() { if (byteIndex >= byteCount) { throw Error('Invalid byte index'); } var continuationByte = byteArray[byteIndex] & 0xFF; byteIndex++; if ((continuationByte & 0xC0) == 0x80) { return continuationByte & 0x3F; } // If we end up here, it’s not a continuation byte throw Error('Invalid continuation byte'); } function decodeSymbol() { var byte1; var byte2; var byte3; var byte4; var codePoint; if (byteIndex > byteCount) { throw Error('Invalid byte index'); } if (byteIndex == byteCount) { return false; } // Read first byte byte1 = byteArray[byteIndex] & 0xFF; byteIndex++; // 1-byte sequence (no continuation bytes) if ((byte1 & 0x80) == 0) { return byte1; } // 2-byte sequence if ((byte1 & 0xE0) == 0xC0) { var byte2 = readContinuationByte(); codePoint = ((byte1 & 0x1F) << 6) | byte2; if (codePoint >= 0x80) { return codePoint; } else { throw Error('Invalid continuation byte'); } } // 3-byte sequence (may include unpaired surrogates) if ((byte1 & 0xF0) == 0xE0) { byte2 = readContinuationByte(); byte3 = readContinuationByte(); codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3; if (codePoint >= 0x0800) { return codePoint; } else { throw Error('Invalid continuation byte'); } } // 4-byte sequence if ((byte1 & 0xF8) == 0xF0) { byte2 = readContinuationByte(); byte3 = readContinuationByte(); byte4 = readContinuationByte(); codePoint = ((byte1 & 0x0F) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4; if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) { return codePoint; } } throw Error('Invalid UTF-8 detected'); } var byteArray; var byteCount; var byteIndex; function utf8decode(byteString) { byteArray = ucs2decode(byteString); byteCount = byteArray.length; byteIndex = 0; var codePoints = []; var tmp; while ((tmp = decodeSymbol()) !== false) { codePoints.push(tmp); } return ucs2encode(codePoints); } /*--------------------------------------------------------------------------*/ var utf8 = { 'version': '2.0.0', 'encode': utf8encode, 'decode': utf8decode }; // Some AMD build optimizers, like r.js, check for specific condition patterns // like the following: if ( typeof define == 'function' && typeof define.amd == 'object' && define.amd ) { define(function() { return utf8; }); } else if (freeExports && !freeExports.nodeType) { if (freeModule) { // in Node.js or RingoJS v0.8.0+ freeModule.exports = utf8; } else { // in Narwhal or RingoJS v0.7.0- var object = {}; var hasOwnProperty = object.hasOwnProperty; for (var key in utf8) { hasOwnProperty.call(utf8, key) && (freeExports[key] = utf8[key]); } } } else { // in Rhino or a web browser root.utf8 = utf8; } }(this));