| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- import { createSequenceEmojiRegexItem, createSetEmojiRegexItem, createOptionalEmojiRegexItem, cloneEmojiRegexItem } from './base.mjs';
- import { optimiseNumbersSet } from './numbers.mjs';
- import '../convert.mjs';
- import '../data.mjs';
- function findSimilarRegexItemSequences(items) {
- const startRegex = /* @__PURE__ */ Object.create(null);
- const endRegex = /* @__PURE__ */ Object.create(null);
- const addMapItem = (target, index, regex, slice) => {
- if (!target[regex]) {
- target[regex] = {
- // Start with 0. One item will remain after replacement
- score: 0,
- slices: [
- {
- index,
- slice
- }
- ]
- };
- return;
- }
- const item = target[regex];
- item.score += regex.length;
- item.slices.push({
- index,
- slice
- });
- };
- for (let index = 0; index < items.length; index++) {
- const baseItem = items[index];
- switch (baseItem.type) {
- case "optional":
- case "utf16": {
- addMapItem(startRegex, index, baseItem.regex, "full");
- addMapItem(endRegex, index, baseItem.regex, "full");
- break;
- }
- case "sequence": {
- addMapItem(startRegex, index, baseItem.regex, "full");
- addMapItem(endRegex, index, baseItem.regex, "full");
- const sequence = baseItem.items;
- for (let i = 1; i < sequence.length; i++) {
- const startSequence = createSequenceEmojiRegexItem(
- sequence.slice(0, i)
- );
- addMapItem(startRegex, index, startSequence.regex, i);
- const endSequence = createSequenceEmojiRegexItem(
- sequence.slice(i)
- );
- addMapItem(endRegex, index, endSequence.regex, i);
- }
- break;
- }
- case "set":
- throw new Error("Unexpected set within a set");
- }
- }
- let result;
- const checkResults = (target, type) => {
- for (const regex in target) {
- const item = target[regex];
- if (!item.score) {
- continue;
- }
- if (!result || result.score < item.score) {
- result = {
- score: item.score,
- sequences: [
- {
- type,
- slices: item.slices
- }
- ]
- };
- continue;
- }
- if (result.score === item.score) {
- result.sequences.push({
- type,
- slices: item.slices
- });
- }
- }
- };
- checkResults(startRegex, "start");
- checkResults(endRegex, "end");
- return result;
- }
- function mergeSimilarRegexItemSequences(items, merge, optimise) {
- const { type, slices } = merge;
- const indexes = /* @__PURE__ */ new Set();
- let hasFullSequence = false;
- let longestMatch = 0;
- let longestMatchIndex = -1;
- const differentSequences = [];
- for (let i = 0; i < slices.length; i++) {
- const { index, slice } = slices[i];
- const item = items[index];
- let length;
- if (slice === "full") {
- hasFullSequence = true;
- if (item.type === "sequence") {
- length = item.items.length;
- } else {
- length = 1;
- }
- } else {
- if (item.type !== "sequence") {
- throw new Error(
- `Unexpected partial match for type "${item.type}"`
- );
- }
- length = type === "start" ? slice : item.items.length - slice;
- differentSequences.push(
- type === "start" ? item.items.slice(slice) : item.items.slice(0, slice)
- );
- }
- if (length > longestMatch) {
- longestMatchIndex = index;
- longestMatch = length;
- }
- indexes.add(index);
- }
- if (longestMatch < 1 || longestMatchIndex < 0) {
- throw new Error("Cannot find common sequence");
- }
- const commonItem = items[longestMatchIndex];
- let sequence;
- if (commonItem.type !== "sequence") {
- if (longestMatch !== 1) {
- throw new Error(
- "Something went wrong. Cannot have long match in non-sequence"
- );
- }
- sequence = [commonItem];
- } else {
- sequence = type === "start" ? commonItem.items.slice(0, longestMatch) : commonItem.items.slice(
- commonItem.items.length - longestMatch
- );
- }
- const setItems = [];
- for (let i = 0; i < differentSequences.length; i++) {
- const list = differentSequences[i];
- if (list.length === 1) {
- setItems.push(list[0]);
- } else {
- setItems.push(createSequenceEmojiRegexItem(list));
- }
- }
- const set = createSetEmojiRegexItem(setItems);
- let mergedChunk = set.sets.length === 1 ? (
- // Do not run callback if only 1 item
- set.sets[0]
- ) : optimise ? (
- // Run callback to optimise it
- optimise(set)
- ) : (
- // Use set as is
- set
- );
- if (hasFullSequence) {
- mergedChunk = createOptionalEmojiRegexItem(mergedChunk);
- }
- sequence[type === "start" ? "push" : "unshift"](mergedChunk);
- const results = [
- createSequenceEmojiRegexItem(sequence),
- ...items.filter((item, index) => !indexes.has(index))
- ];
- return results;
- }
- function mergeSimilarItemsInSet(set) {
- const updatedSet = optimiseNumbersSet(set);
- if (updatedSet.type !== "set") {
- return updatedSet;
- }
- set = updatedSet;
- let merges;
- while (merges = findSimilarRegexItemSequences(set.sets)) {
- const sequences = merges.sequences;
- if (sequences.length === 1) {
- const merged = mergeSimilarRegexItemSequences(
- set.sets.map((item) => cloneEmojiRegexItem(item, true)),
- sequences[0],
- mergeSimilarItemsInSet
- );
- if (merged.length === 1) {
- return merged[0];
- }
- set = createSetEmojiRegexItem(merged);
- continue;
- }
- let newItem;
- for (let i = 0; i < sequences.length; i++) {
- const merged = mergeSimilarRegexItemSequences(
- set.sets.map((item) => cloneEmojiRegexItem(item, true)),
- sequences[i],
- mergeSimilarItemsInSet
- );
- const mergedItem = merged.length === 1 ? merged[0] : createSetEmojiRegexItem(merged);
- if (!newItem || mergedItem.regex.length < newItem.regex.length) {
- newItem = mergedItem;
- }
- }
- if (!newItem) {
- throw new Error("Empty sequences list");
- }
- if (newItem.type !== "set") {
- return newItem;
- }
- set = newItem;
- }
- return set;
- }
- export { findSimilarRegexItemSequences, mergeSimilarItemsInSet, mergeSimilarRegexItemSequences };
|