base.mjs 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. function toString(number) {
  2. if (number < 255) {
  3. if (number > 32 && number < 127) {
  4. const char = String.fromCharCode(number);
  5. if (
  6. // 0-9
  7. number > 47 && number < 58 || // A-Z
  8. number > 64 && number < 91 || // _`a-z
  9. number > 94 && number < 123
  10. ) {
  11. return char;
  12. }
  13. return "\\" + char;
  14. }
  15. return "\\x" + (number < 16 ? "0" : "") + number.toString(16).toUpperCase();
  16. }
  17. return "\\u" + number.toString(16).toUpperCase();
  18. }
  19. function wrapRegexInGroup(regex) {
  20. return "(?:" + regex + ")";
  21. }
  22. function updateUTF16EmojiRegexItem(item) {
  23. const numbers = item.numbers;
  24. if (numbers.length === 1) {
  25. const num = numbers[0];
  26. return item.regex = toString(num);
  27. }
  28. numbers.sort((a, b) => a - b);
  29. const chars = [];
  30. let range = null;
  31. const addRange = () => {
  32. if (range) {
  33. const { start, last, numbers: numbers2 } = range;
  34. range = null;
  35. if (last > start + 1) {
  36. chars.push(toString(start) + "-" + toString(last));
  37. } else {
  38. for (let i = 0; i < numbers2.length; i++) {
  39. chars.push(toString(numbers2[i]));
  40. }
  41. }
  42. }
  43. };
  44. for (let i = 0; i < numbers.length; i++) {
  45. const num = numbers[i];
  46. if (range) {
  47. if (range.last === num) {
  48. continue;
  49. }
  50. if (range.last === num - 1) {
  51. range.numbers.push(num);
  52. range.last = num;
  53. continue;
  54. }
  55. }
  56. addRange();
  57. range = {
  58. start: num,
  59. last: num,
  60. numbers: [num]
  61. };
  62. }
  63. addRange();
  64. if (!chars.length) {
  65. throw new Error("Unexpected empty range");
  66. }
  67. return item.regex = "[" + chars.join("") + "]";
  68. }
  69. function createUTF16EmojiRegexItem(numbers) {
  70. const result = {
  71. type: "utf16",
  72. regex: "",
  73. numbers,
  74. length: 1,
  75. group: true
  76. };
  77. updateUTF16EmojiRegexItem(result);
  78. return result;
  79. }
  80. function updateSequenceEmojiRegexItem(item) {
  81. return item.regex = item.items.map((childItem) => {
  82. if (!childItem.group && childItem.type === "set") {
  83. return wrapRegexInGroup(childItem.regex);
  84. }
  85. return childItem.regex;
  86. }).join("");
  87. }
  88. function createSequenceEmojiRegexItem(sequence, numbers) {
  89. let items = [];
  90. sequence.forEach((item) => {
  91. if (item.type === "sequence") {
  92. items = items.concat(item.items);
  93. } else {
  94. items.push(item);
  95. }
  96. });
  97. if (!items.length) {
  98. throw new Error("Empty sequence");
  99. }
  100. const result = {
  101. type: "sequence",
  102. items,
  103. regex: "",
  104. length: items.reduce((length, item) => item.length + length, 0),
  105. group: false
  106. };
  107. if (sequence.length === 1) {
  108. const firstItem = sequence[0];
  109. result.group = firstItem.group;
  110. if (firstItem.type !== "optional") {
  111. const numbers2 = firstItem.numbers;
  112. if (numbers2) {
  113. result.numbers = numbers2;
  114. }
  115. }
  116. }
  117. if (numbers) {
  118. result.numbers = numbers;
  119. }
  120. updateSequenceEmojiRegexItem(result);
  121. return result;
  122. }
  123. function updateSetEmojiRegexItem(item) {
  124. if (item.sets.length === 1) {
  125. const firstItem = item.sets[0];
  126. item.group = firstItem.group;
  127. return item.regex = firstItem.regex;
  128. }
  129. item.group = false;
  130. return item.regex = item.sets.map((childItem) => childItem.regex).join("|");
  131. }
  132. function createSetEmojiRegexItem(set) {
  133. let sets = [];
  134. let numbers = [];
  135. set.forEach((item) => {
  136. if (item.type === "set") {
  137. sets = sets.concat(item.sets);
  138. } else {
  139. sets.push(item);
  140. }
  141. if (numbers) {
  142. if (item.type === "optional" || !item.numbers) {
  143. numbers = null;
  144. } else {
  145. numbers = [...numbers, ...item.numbers];
  146. }
  147. }
  148. });
  149. sets.sort((a, b) => {
  150. if (a.length === b.length) {
  151. return a.regex.localeCompare(b.regex);
  152. }
  153. return b.length - a.length;
  154. });
  155. const result = {
  156. type: "set",
  157. sets,
  158. regex: "",
  159. length: sets.reduce(
  160. (length, item) => length ? Math.min(length, item.length) : item.length,
  161. 0
  162. ),
  163. group: false
  164. };
  165. if (numbers) {
  166. result.numbers = numbers;
  167. }
  168. if (set.length === 1) {
  169. const firstItem = set[0];
  170. result.group = firstItem.group;
  171. }
  172. updateSetEmojiRegexItem(result);
  173. return result;
  174. }
  175. function updateOptionalEmojiRegexItem(item) {
  176. const childItem = item.item;
  177. const regex = (childItem.group ? childItem.regex : wrapRegexInGroup(childItem.regex)) + "?";
  178. return item.regex = regex;
  179. }
  180. function createOptionalEmojiRegexItem(item) {
  181. if (item.type === "optional") {
  182. return item;
  183. }
  184. const result = {
  185. type: "optional",
  186. item,
  187. regex: "",
  188. length: item.length,
  189. group: true
  190. };
  191. updateOptionalEmojiRegexItem(result);
  192. return result;
  193. }
  194. function cloneEmojiRegexItem(item, shallow = false) {
  195. const result = {
  196. ...item
  197. };
  198. if (result.type !== "optional" && result.numbers) {
  199. result.numbers = [...result.numbers];
  200. }
  201. switch (result.type) {
  202. case "utf16":
  203. break;
  204. case "sequence":
  205. if (shallow) {
  206. result.items = [...result.items];
  207. } else {
  208. result.items = result.items.map(
  209. (item2) => cloneEmojiRegexItem(item2, false)
  210. );
  211. }
  212. break;
  213. case "set":
  214. if (shallow) {
  215. result.sets = [...result.sets];
  216. } else {
  217. result.sets = result.sets.map(
  218. (item2) => cloneEmojiRegexItem(item2, false)
  219. );
  220. }
  221. break;
  222. case "optional":
  223. if (!shallow) {
  224. result.item = cloneEmojiRegexItem(result.item, false);
  225. }
  226. break;
  227. }
  228. return result;
  229. }
  230. export { cloneEmojiRegexItem, createOptionalEmojiRegexItem, createSequenceEmojiRegexItem, createSetEmojiRegexItem, createUTF16EmojiRegexItem, updateOptionalEmojiRegexItem, updateSequenceEmojiRegexItem, updateSetEmojiRegexItem, updateUTF16EmojiRegexItem, wrapRegexInGroup };