base.cjs 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. 'use strict';
  2. function toString(number) {
  3. if (number < 255) {
  4. if (number > 32 && number < 127) {
  5. const char = String.fromCharCode(number);
  6. if (
  7. // 0-9
  8. number > 47 && number < 58 || // A-Z
  9. number > 64 && number < 91 || // _`a-z
  10. number > 94 && number < 123
  11. ) {
  12. return char;
  13. }
  14. return "\\" + char;
  15. }
  16. return "\\x" + (number < 16 ? "0" : "") + number.toString(16).toUpperCase();
  17. }
  18. return "\\u" + number.toString(16).toUpperCase();
  19. }
  20. function wrapRegexInGroup(regex) {
  21. return "(?:" + regex + ")";
  22. }
  23. function updateUTF16EmojiRegexItem(item) {
  24. const numbers = item.numbers;
  25. if (numbers.length === 1) {
  26. const num = numbers[0];
  27. return item.regex = toString(num);
  28. }
  29. numbers.sort((a, b) => a - b);
  30. const chars = [];
  31. let range = null;
  32. const addRange = () => {
  33. if (range) {
  34. const { start, last, numbers: numbers2 } = range;
  35. range = null;
  36. if (last > start + 1) {
  37. chars.push(toString(start) + "-" + toString(last));
  38. } else {
  39. for (let i = 0; i < numbers2.length; i++) {
  40. chars.push(toString(numbers2[i]));
  41. }
  42. }
  43. }
  44. };
  45. for (let i = 0; i < numbers.length; i++) {
  46. const num = numbers[i];
  47. if (range) {
  48. if (range.last === num) {
  49. continue;
  50. }
  51. if (range.last === num - 1) {
  52. range.numbers.push(num);
  53. range.last = num;
  54. continue;
  55. }
  56. }
  57. addRange();
  58. range = {
  59. start: num,
  60. last: num,
  61. numbers: [num]
  62. };
  63. }
  64. addRange();
  65. if (!chars.length) {
  66. throw new Error("Unexpected empty range");
  67. }
  68. return item.regex = "[" + chars.join("") + "]";
  69. }
  70. function createUTF16EmojiRegexItem(numbers) {
  71. const result = {
  72. type: "utf16",
  73. regex: "",
  74. numbers,
  75. length: 1,
  76. group: true
  77. };
  78. updateUTF16EmojiRegexItem(result);
  79. return result;
  80. }
  81. function updateSequenceEmojiRegexItem(item) {
  82. return item.regex = item.items.map((childItem) => {
  83. if (!childItem.group && childItem.type === "set") {
  84. return wrapRegexInGroup(childItem.regex);
  85. }
  86. return childItem.regex;
  87. }).join("");
  88. }
  89. function createSequenceEmojiRegexItem(sequence, numbers) {
  90. let items = [];
  91. sequence.forEach((item) => {
  92. if (item.type === "sequence") {
  93. items = items.concat(item.items);
  94. } else {
  95. items.push(item);
  96. }
  97. });
  98. if (!items.length) {
  99. throw new Error("Empty sequence");
  100. }
  101. const result = {
  102. type: "sequence",
  103. items,
  104. regex: "",
  105. length: items.reduce((length, item) => item.length + length, 0),
  106. group: false
  107. };
  108. if (sequence.length === 1) {
  109. const firstItem = sequence[0];
  110. result.group = firstItem.group;
  111. if (firstItem.type !== "optional") {
  112. const numbers2 = firstItem.numbers;
  113. if (numbers2) {
  114. result.numbers = numbers2;
  115. }
  116. }
  117. }
  118. if (numbers) {
  119. result.numbers = numbers;
  120. }
  121. updateSequenceEmojiRegexItem(result);
  122. return result;
  123. }
  124. function updateSetEmojiRegexItem(item) {
  125. if (item.sets.length === 1) {
  126. const firstItem = item.sets[0];
  127. item.group = firstItem.group;
  128. return item.regex = firstItem.regex;
  129. }
  130. item.group = false;
  131. return item.regex = item.sets.map((childItem) => childItem.regex).join("|");
  132. }
  133. function createSetEmojiRegexItem(set) {
  134. let sets = [];
  135. let numbers = [];
  136. set.forEach((item) => {
  137. if (item.type === "set") {
  138. sets = sets.concat(item.sets);
  139. } else {
  140. sets.push(item);
  141. }
  142. if (numbers) {
  143. if (item.type === "optional" || !item.numbers) {
  144. numbers = null;
  145. } else {
  146. numbers = [...numbers, ...item.numbers];
  147. }
  148. }
  149. });
  150. sets.sort((a, b) => {
  151. if (a.length === b.length) {
  152. return a.regex.localeCompare(b.regex);
  153. }
  154. return b.length - a.length;
  155. });
  156. const result = {
  157. type: "set",
  158. sets,
  159. regex: "",
  160. length: sets.reduce(
  161. (length, item) => length ? Math.min(length, item.length) : item.length,
  162. 0
  163. ),
  164. group: false
  165. };
  166. if (numbers) {
  167. result.numbers = numbers;
  168. }
  169. if (set.length === 1) {
  170. const firstItem = set[0];
  171. result.group = firstItem.group;
  172. }
  173. updateSetEmojiRegexItem(result);
  174. return result;
  175. }
  176. function updateOptionalEmojiRegexItem(item) {
  177. const childItem = item.item;
  178. const regex = (childItem.group ? childItem.regex : wrapRegexInGroup(childItem.regex)) + "?";
  179. return item.regex = regex;
  180. }
  181. function createOptionalEmojiRegexItem(item) {
  182. if (item.type === "optional") {
  183. return item;
  184. }
  185. const result = {
  186. type: "optional",
  187. item,
  188. regex: "",
  189. length: item.length,
  190. group: true
  191. };
  192. updateOptionalEmojiRegexItem(result);
  193. return result;
  194. }
  195. function cloneEmojiRegexItem(item, shallow = false) {
  196. const result = {
  197. ...item
  198. };
  199. if (result.type !== "optional" && result.numbers) {
  200. result.numbers = [...result.numbers];
  201. }
  202. switch (result.type) {
  203. case "utf16":
  204. break;
  205. case "sequence":
  206. if (shallow) {
  207. result.items = [...result.items];
  208. } else {
  209. result.items = result.items.map(
  210. (item2) => cloneEmojiRegexItem(item2, false)
  211. );
  212. }
  213. break;
  214. case "set":
  215. if (shallow) {
  216. result.sets = [...result.sets];
  217. } else {
  218. result.sets = result.sets.map(
  219. (item2) => cloneEmojiRegexItem(item2, false)
  220. );
  221. }
  222. break;
  223. case "optional":
  224. if (!shallow) {
  225. result.item = cloneEmojiRegexItem(result.item, false);
  226. }
  227. break;
  228. }
  229. return result;
  230. }
  231. exports.cloneEmojiRegexItem = cloneEmojiRegexItem;
  232. exports.createOptionalEmojiRegexItem = createOptionalEmojiRegexItem;
  233. exports.createSequenceEmojiRegexItem = createSequenceEmojiRegexItem;
  234. exports.createSetEmojiRegexItem = createSetEmojiRegexItem;
  235. exports.createUTF16EmojiRegexItem = createUTF16EmojiRegexItem;
  236. exports.updateOptionalEmojiRegexItem = updateOptionalEmojiRegexItem;
  237. exports.updateSequenceEmojiRegexItem = updateSequenceEmojiRegexItem;
  238. exports.updateSetEmojiRegexItem = updateSetEmojiRegexItem;
  239. exports.updateUTF16EmojiRegexItem = updateUTF16EmojiRegexItem;
  240. exports.wrapRegexInGroup = wrapRegexInGroup;