similar.cjs 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. 'use strict';
  2. const emoji_regex_base = require('./base.cjs');
  3. const emoji_regex_numbers = require('./numbers.cjs');
  4. require('../convert.cjs');
  5. require('../data.cjs');
  6. function findSimilarRegexItemSequences(items) {
  7. const startRegex = /* @__PURE__ */ Object.create(null);
  8. const endRegex = /* @__PURE__ */ Object.create(null);
  9. const addMapItem = (target, index, regex, slice) => {
  10. if (!target[regex]) {
  11. target[regex] = {
  12. // Start with 0. One item will remain after replacement
  13. score: 0,
  14. slices: [
  15. {
  16. index,
  17. slice
  18. }
  19. ]
  20. };
  21. return;
  22. }
  23. const item = target[regex];
  24. item.score += regex.length;
  25. item.slices.push({
  26. index,
  27. slice
  28. });
  29. };
  30. for (let index = 0; index < items.length; index++) {
  31. const baseItem = items[index];
  32. switch (baseItem.type) {
  33. case "optional":
  34. case "utf16": {
  35. addMapItem(startRegex, index, baseItem.regex, "full");
  36. addMapItem(endRegex, index, baseItem.regex, "full");
  37. break;
  38. }
  39. case "sequence": {
  40. addMapItem(startRegex, index, baseItem.regex, "full");
  41. addMapItem(endRegex, index, baseItem.regex, "full");
  42. const sequence = baseItem.items;
  43. for (let i = 1; i < sequence.length; i++) {
  44. const startSequence = emoji_regex_base.createSequenceEmojiRegexItem(
  45. sequence.slice(0, i)
  46. );
  47. addMapItem(startRegex, index, startSequence.regex, i);
  48. const endSequence = emoji_regex_base.createSequenceEmojiRegexItem(
  49. sequence.slice(i)
  50. );
  51. addMapItem(endRegex, index, endSequence.regex, i);
  52. }
  53. break;
  54. }
  55. case "set":
  56. throw new Error("Unexpected set within a set");
  57. }
  58. }
  59. let result;
  60. const checkResults = (target, type) => {
  61. for (const regex in target) {
  62. const item = target[regex];
  63. if (!item.score) {
  64. continue;
  65. }
  66. if (!result || result.score < item.score) {
  67. result = {
  68. score: item.score,
  69. sequences: [
  70. {
  71. type,
  72. slices: item.slices
  73. }
  74. ]
  75. };
  76. continue;
  77. }
  78. if (result.score === item.score) {
  79. result.sequences.push({
  80. type,
  81. slices: item.slices
  82. });
  83. }
  84. }
  85. };
  86. checkResults(startRegex, "start");
  87. checkResults(endRegex, "end");
  88. return result;
  89. }
  90. function mergeSimilarRegexItemSequences(items, merge, optimise) {
  91. const { type, slices } = merge;
  92. const indexes = /* @__PURE__ */ new Set();
  93. let hasFullSequence = false;
  94. let longestMatch = 0;
  95. let longestMatchIndex = -1;
  96. const differentSequences = [];
  97. for (let i = 0; i < slices.length; i++) {
  98. const { index, slice } = slices[i];
  99. const item = items[index];
  100. let length;
  101. if (slice === "full") {
  102. hasFullSequence = true;
  103. if (item.type === "sequence") {
  104. length = item.items.length;
  105. } else {
  106. length = 1;
  107. }
  108. } else {
  109. if (item.type !== "sequence") {
  110. throw new Error(
  111. `Unexpected partial match for type "${item.type}"`
  112. );
  113. }
  114. length = type === "start" ? slice : item.items.length - slice;
  115. differentSequences.push(
  116. type === "start" ? item.items.slice(slice) : item.items.slice(0, slice)
  117. );
  118. }
  119. if (length > longestMatch) {
  120. longestMatchIndex = index;
  121. longestMatch = length;
  122. }
  123. indexes.add(index);
  124. }
  125. if (longestMatch < 1 || longestMatchIndex < 0) {
  126. throw new Error("Cannot find common sequence");
  127. }
  128. const commonItem = items[longestMatchIndex];
  129. let sequence;
  130. if (commonItem.type !== "sequence") {
  131. if (longestMatch !== 1) {
  132. throw new Error(
  133. "Something went wrong. Cannot have long match in non-sequence"
  134. );
  135. }
  136. sequence = [commonItem];
  137. } else {
  138. sequence = type === "start" ? commonItem.items.slice(0, longestMatch) : commonItem.items.slice(
  139. commonItem.items.length - longestMatch
  140. );
  141. }
  142. const setItems = [];
  143. for (let i = 0; i < differentSequences.length; i++) {
  144. const list = differentSequences[i];
  145. if (list.length === 1) {
  146. setItems.push(list[0]);
  147. } else {
  148. setItems.push(emoji_regex_base.createSequenceEmojiRegexItem(list));
  149. }
  150. }
  151. const set = emoji_regex_base.createSetEmojiRegexItem(setItems);
  152. let mergedChunk = set.sets.length === 1 ? (
  153. // Do not run callback if only 1 item
  154. set.sets[0]
  155. ) : optimise ? (
  156. // Run callback to optimise it
  157. optimise(set)
  158. ) : (
  159. // Use set as is
  160. set
  161. );
  162. if (hasFullSequence) {
  163. mergedChunk = emoji_regex_base.createOptionalEmojiRegexItem(mergedChunk);
  164. }
  165. sequence[type === "start" ? "push" : "unshift"](mergedChunk);
  166. const results = [
  167. emoji_regex_base.createSequenceEmojiRegexItem(sequence),
  168. ...items.filter((item, index) => !indexes.has(index))
  169. ];
  170. return results;
  171. }
  172. function mergeSimilarItemsInSet(set) {
  173. const updatedSet = emoji_regex_numbers.optimiseNumbersSet(set);
  174. if (updatedSet.type !== "set") {
  175. return updatedSet;
  176. }
  177. set = updatedSet;
  178. let merges;
  179. while (merges = findSimilarRegexItemSequences(set.sets)) {
  180. const sequences = merges.sequences;
  181. if (sequences.length === 1) {
  182. const merged = mergeSimilarRegexItemSequences(
  183. set.sets.map((item) => emoji_regex_base.cloneEmojiRegexItem(item, true)),
  184. sequences[0],
  185. mergeSimilarItemsInSet
  186. );
  187. if (merged.length === 1) {
  188. return merged[0];
  189. }
  190. set = emoji_regex_base.createSetEmojiRegexItem(merged);
  191. continue;
  192. }
  193. let newItem;
  194. for (let i = 0; i < sequences.length; i++) {
  195. const merged = mergeSimilarRegexItemSequences(
  196. set.sets.map((item) => emoji_regex_base.cloneEmojiRegexItem(item, true)),
  197. sequences[i],
  198. mergeSimilarItemsInSet
  199. );
  200. const mergedItem = merged.length === 1 ? merged[0] : emoji_regex_base.createSetEmojiRegexItem(merged);
  201. if (!newItem || mergedItem.regex.length < newItem.regex.length) {
  202. newItem = mergedItem;
  203. }
  204. }
  205. if (!newItem) {
  206. throw new Error("Empty sequences list");
  207. }
  208. if (newItem.type !== "set") {
  209. return newItem;
  210. }
  211. set = newItem;
  212. }
  213. return set;
  214. }
  215. exports.findSimilarRegexItemSequences = findSimilarRegexItemSequences;
  216. exports.mergeSimilarItemsInSet = mergeSimilarItemsInSet;
  217. exports.mergeSimilarRegexItemSequences = mergeSimilarRegexItemSequences;