You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

234 lines
6.0 KiB

8 years ago
  1. /*! https://mths.be/wtf8 v1.0.0 by @mathias */
  2. ;(function(root) {
  3. // Detect free variables `exports`
  4. var freeExports = typeof exports == 'object' && exports;
  5. // Detect free variable `module`
  6. var freeModule = typeof module == 'object' && module &&
  7. module.exports == freeExports && module;
  8. // Detect free variable `global`, from Node.js or Browserified code,
  9. // and use it as `root`
  10. var freeGlobal = typeof global == 'object' && global;
  11. if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) {
  12. root = freeGlobal;
  13. }
  14. /*--------------------------------------------------------------------------*/
  15. var stringFromCharCode = String.fromCharCode;
  16. // Taken from https://mths.be/punycode
  17. function ucs2decode(string) {
  18. var output = [];
  19. var counter = 0;
  20. var length = string.length;
  21. var value;
  22. var extra;
  23. while (counter < length) {
  24. value = string.charCodeAt(counter++);
  25. if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
  26. // high surrogate, and there is a next character
  27. extra = string.charCodeAt(counter++);
  28. if ((extra & 0xFC00) == 0xDC00) { // low surrogate
  29. output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
  30. } else {
  31. // unmatched surrogate; only append this code unit, in case the next
  32. // code unit is the high surrogate of a surrogate pair
  33. output.push(value);
  34. counter--;
  35. }
  36. } else {
  37. output.push(value);
  38. }
  39. }
  40. return output;
  41. }
  42. // Taken from https://mths.be/punycode
  43. function ucs2encode(array) {
  44. var length = array.length;
  45. var index = -1;
  46. var value;
  47. var output = '';
  48. while (++index < length) {
  49. value = array[index];
  50. if (value > 0xFFFF) {
  51. value -= 0x10000;
  52. output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
  53. value = 0xDC00 | value & 0x3FF;
  54. }
  55. output += stringFromCharCode(value);
  56. }
  57. return output;
  58. }
  59. /*--------------------------------------------------------------------------*/
  60. function createByte(codePoint, shift) {
  61. return stringFromCharCode(((codePoint >> shift) & 0x3F) | 0x80);
  62. }
  63. function encodeCodePoint(codePoint) {
  64. if ((codePoint & 0xFFFFFF80) == 0) { // 1-byte sequence
  65. return stringFromCharCode(codePoint);
  66. }
  67. var symbol = '';
  68. if ((codePoint & 0xFFFFF800) == 0) { // 2-byte sequence
  69. symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0);
  70. }
  71. else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence
  72. symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0);
  73. symbol += createByte(codePoint, 6);
  74. }
  75. else if ((codePoint & 0xFFE00000) == 0) { // 4-byte sequence
  76. symbol = stringFromCharCode(((codePoint >> 18) & 0x07) | 0xF0);
  77. symbol += createByte(codePoint, 12);
  78. symbol += createByte(codePoint, 6);
  79. }
  80. symbol += stringFromCharCode((codePoint & 0x3F) | 0x80);
  81. return symbol;
  82. }
  83. function wtf8encode(string) {
  84. var codePoints = ucs2decode(string);
  85. var length = codePoints.length;
  86. var index = -1;
  87. var codePoint;
  88. var byteString = '';
  89. while (++index < length) {
  90. codePoint = codePoints[index];
  91. byteString += encodeCodePoint(codePoint);
  92. }
  93. return byteString;
  94. }
  95. /*--------------------------------------------------------------------------*/
  96. function readContinuationByte() {
  97. if (byteIndex >= byteCount) {
  98. throw Error('Invalid byte index');
  99. }
  100. var continuationByte = byteArray[byteIndex] & 0xFF;
  101. byteIndex++;
  102. if ((continuationByte & 0xC0) == 0x80) {
  103. return continuationByte & 0x3F;
  104. }
  105. // If we end up here, it’s not a continuation byte.
  106. throw Error('Invalid continuation byte');
  107. }
  108. function decodeSymbol() {
  109. var byte1;
  110. var byte2;
  111. var byte3;
  112. var byte4;
  113. var codePoint;
  114. if (byteIndex > byteCount) {
  115. throw Error('Invalid byte index');
  116. }
  117. if (byteIndex == byteCount) {
  118. return false;
  119. }
  120. // Read the first byte.
  121. byte1 = byteArray[byteIndex] & 0xFF;
  122. byteIndex++;
  123. // 1-byte sequence (no continuation bytes)
  124. if ((byte1 & 0x80) == 0) {
  125. return byte1;
  126. }
  127. // 2-byte sequence
  128. if ((byte1 & 0xE0) == 0xC0) {
  129. var byte2 = readContinuationByte();
  130. codePoint = ((byte1 & 0x1F) << 6) | byte2;
  131. if (codePoint >= 0x80) {
  132. return codePoint;
  133. } else {
  134. throw Error('Invalid continuation byte');
  135. }
  136. }
  137. // 3-byte sequence (may include unpaired surrogates)
  138. if ((byte1 & 0xF0) == 0xE0) {
  139. byte2 = readContinuationByte();
  140. byte3 = readContinuationByte();
  141. codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
  142. if (codePoint >= 0x0800) {
  143. return codePoint;
  144. } else {
  145. throw Error('Invalid continuation byte');
  146. }
  147. }
  148. // 4-byte sequence
  149. if ((byte1 & 0xF8) == 0xF0) {
  150. byte2 = readContinuationByte();
  151. byte3 = readContinuationByte();
  152. byte4 = readContinuationByte();
  153. codePoint = ((byte1 & 0x0F) << 0x12) | (byte2 << 0x0C) |
  154. (byte3 << 0x06) | byte4;
  155. if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
  156. return codePoint;
  157. }
  158. }
  159. throw Error('Invalid WTF-8 detected');
  160. }
  161. var byteArray;
  162. var byteCount;
  163. var byteIndex;
  164. function wtf8decode(byteString) {
  165. byteArray = ucs2decode(byteString);
  166. byteCount = byteArray.length;
  167. byteIndex = 0;
  168. var codePoints = [];
  169. var tmp;
  170. while ((tmp = decodeSymbol()) !== false) {
  171. codePoints.push(tmp);
  172. }
  173. return ucs2encode(codePoints);
  174. }
  175. /*--------------------------------------------------------------------------*/
  176. var wtf8 = {
  177. 'version': '1.0.0',
  178. 'encode': wtf8encode,
  179. 'decode': wtf8decode
  180. };
  181. // Some AMD build optimizers, like r.js, check for specific condition patterns
  182. // like the following:
  183. if (
  184. typeof define == 'function' &&
  185. typeof define.amd == 'object' &&
  186. define.amd
  187. ) {
  188. define(function() {
  189. return wtf8;
  190. });
  191. } else if (freeExports && !freeExports.nodeType) {
  192. if (freeModule) { // in Node.js or RingoJS v0.8.0+
  193. freeModule.exports = wtf8;
  194. } else { // in Narwhal or RingoJS v0.7.0-
  195. var object = {};
  196. var hasOwnProperty = object.hasOwnProperty;
  197. for (var key in wtf8) {
  198. hasOwnProperty.call(wtf8, key) && (freeExports[key] = wtf8[key]);
  199. }
  200. }
  201. } else { // in Rhino or a web browser
  202. root.wtf8 = wtf8;
  203. }
  204. }(this));