similar.mjs 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. import { createSequenceEmojiRegexItem, createSetEmojiRegexItem, createOptionalEmojiRegexItem, cloneEmojiRegexItem } from './base.mjs';
  2. import { optimiseNumbersSet } from './numbers.mjs';
  3. import '../convert.mjs';
  4. import '../data.mjs';
  5. function findSimilarRegexItemSequences(items) {
  6. const startRegex = /* @__PURE__ */ Object.create(null);
  7. const endRegex = /* @__PURE__ */ Object.create(null);
  8. const addMapItem = (target, index, regex, slice) => {
  9. if (!target[regex]) {
  10. target[regex] = {
  11. // Start with 0. One item will remain after replacement
  12. score: 0,
  13. slices: [
  14. {
  15. index,
  16. slice
  17. }
  18. ]
  19. };
  20. return;
  21. }
  22. const item = target[regex];
  23. item.score += regex.length;
  24. item.slices.push({
  25. index,
  26. slice
  27. });
  28. };
  29. for (let index = 0; index < items.length; index++) {
  30. const baseItem = items[index];
  31. switch (baseItem.type) {
  32. case "optional":
  33. case "utf16": {
  34. addMapItem(startRegex, index, baseItem.regex, "full");
  35. addMapItem(endRegex, index, baseItem.regex, "full");
  36. break;
  37. }
  38. case "sequence": {
  39. addMapItem(startRegex, index, baseItem.regex, "full");
  40. addMapItem(endRegex, index, baseItem.regex, "full");
  41. const sequence = baseItem.items;
  42. for (let i = 1; i < sequence.length; i++) {
  43. const startSequence = createSequenceEmojiRegexItem(
  44. sequence.slice(0, i)
  45. );
  46. addMapItem(startRegex, index, startSequence.regex, i);
  47. const endSequence = createSequenceEmojiRegexItem(
  48. sequence.slice(i)
  49. );
  50. addMapItem(endRegex, index, endSequence.regex, i);
  51. }
  52. break;
  53. }
  54. case "set":
  55. throw new Error("Unexpected set within a set");
  56. }
  57. }
  58. let result;
  59. const checkResults = (target, type) => {
  60. for (const regex in target) {
  61. const item = target[regex];
  62. if (!item.score) {
  63. continue;
  64. }
  65. if (!result || result.score < item.score) {
  66. result = {
  67. score: item.score,
  68. sequences: [
  69. {
  70. type,
  71. slices: item.slices
  72. }
  73. ]
  74. };
  75. continue;
  76. }
  77. if (result.score === item.score) {
  78. result.sequences.push({
  79. type,
  80. slices: item.slices
  81. });
  82. }
  83. }
  84. };
  85. checkResults(startRegex, "start");
  86. checkResults(endRegex, "end");
  87. return result;
  88. }
  89. function mergeSimilarRegexItemSequences(items, merge, optimise) {
  90. const { type, slices } = merge;
  91. const indexes = /* @__PURE__ */ new Set();
  92. let hasFullSequence = false;
  93. let longestMatch = 0;
  94. let longestMatchIndex = -1;
  95. const differentSequences = [];
  96. for (let i = 0; i < slices.length; i++) {
  97. const { index, slice } = slices[i];
  98. const item = items[index];
  99. let length;
  100. if (slice === "full") {
  101. hasFullSequence = true;
  102. if (item.type === "sequence") {
  103. length = item.items.length;
  104. } else {
  105. length = 1;
  106. }
  107. } else {
  108. if (item.type !== "sequence") {
  109. throw new Error(
  110. `Unexpected partial match for type "${item.type}"`
  111. );
  112. }
  113. length = type === "start" ? slice : item.items.length - slice;
  114. differentSequences.push(
  115. type === "start" ? item.items.slice(slice) : item.items.slice(0, slice)
  116. );
  117. }
  118. if (length > longestMatch) {
  119. longestMatchIndex = index;
  120. longestMatch = length;
  121. }
  122. indexes.add(index);
  123. }
  124. if (longestMatch < 1 || longestMatchIndex < 0) {
  125. throw new Error("Cannot find common sequence");
  126. }
  127. const commonItem = items[longestMatchIndex];
  128. let sequence;
  129. if (commonItem.type !== "sequence") {
  130. if (longestMatch !== 1) {
  131. throw new Error(
  132. "Something went wrong. Cannot have long match in non-sequence"
  133. );
  134. }
  135. sequence = [commonItem];
  136. } else {
  137. sequence = type === "start" ? commonItem.items.slice(0, longestMatch) : commonItem.items.slice(
  138. commonItem.items.length - longestMatch
  139. );
  140. }
  141. const setItems = [];
  142. for (let i = 0; i < differentSequences.length; i++) {
  143. const list = differentSequences[i];
  144. if (list.length === 1) {
  145. setItems.push(list[0]);
  146. } else {
  147. setItems.push(createSequenceEmojiRegexItem(list));
  148. }
  149. }
  150. const set = createSetEmojiRegexItem(setItems);
  151. let mergedChunk = set.sets.length === 1 ? (
  152. // Do not run callback if only 1 item
  153. set.sets[0]
  154. ) : optimise ? (
  155. // Run callback to optimise it
  156. optimise(set)
  157. ) : (
  158. // Use set as is
  159. set
  160. );
  161. if (hasFullSequence) {
  162. mergedChunk = createOptionalEmojiRegexItem(mergedChunk);
  163. }
  164. sequence[type === "start" ? "push" : "unshift"](mergedChunk);
  165. const results = [
  166. createSequenceEmojiRegexItem(sequence),
  167. ...items.filter((item, index) => !indexes.has(index))
  168. ];
  169. return results;
  170. }
  171. function mergeSimilarItemsInSet(set) {
  172. const updatedSet = optimiseNumbersSet(set);
  173. if (updatedSet.type !== "set") {
  174. return updatedSet;
  175. }
  176. set = updatedSet;
  177. let merges;
  178. while (merges = findSimilarRegexItemSequences(set.sets)) {
  179. const sequences = merges.sequences;
  180. if (sequences.length === 1) {
  181. const merged = mergeSimilarRegexItemSequences(
  182. set.sets.map((item) => cloneEmojiRegexItem(item, true)),
  183. sequences[0],
  184. mergeSimilarItemsInSet
  185. );
  186. if (merged.length === 1) {
  187. return merged[0];
  188. }
  189. set = createSetEmojiRegexItem(merged);
  190. continue;
  191. }
  192. let newItem;
  193. for (let i = 0; i < sequences.length; i++) {
  194. const merged = mergeSimilarRegexItemSequences(
  195. set.sets.map((item) => cloneEmojiRegexItem(item, true)),
  196. sequences[i],
  197. mergeSimilarItemsInSet
  198. );
  199. const mergedItem = merged.length === 1 ? merged[0] : createSetEmojiRegexItem(merged);
  200. if (!newItem || mergedItem.regex.length < newItem.regex.length) {
  201. newItem = mergedItem;
  202. }
  203. }
  204. if (!newItem) {
  205. throw new Error("Empty sequences list");
  206. }
  207. if (newItem.type !== "set") {
  208. return newItem;
  209. }
  210. set = newItem;
  211. }
  212. return set;
  213. }
  214. export { findSimilarRegexItemSequences, mergeSimilarItemsInSet, mergeSimilarRegexItemSequences };