const STOP_WORDS = new Set([
  'the',
  'a',
  'of',
  'and',
  'to',
  'in',
  'for',
  'that',
  'on',
  'is',
  'with',
  'at',
  'by',
  'it',
  'as',
  'but',
  'from',
  'be',
  'an',
  'was',
  'not',
  'this',
  'are',
  'has',
  'who',
  'he',
  'one',
  'or',
  'all',
  'if',
  'i',
  'no',
  'so',
  'we',
  'its',
  'do',
  'its',
  'go',
  'our',
  'too',
  'my',
  'she',
  'her',
  'me',
]);

const MIN_WORD_LENGTH = 2;

export function highlightTokenizer(rawTokens: Array<string>): Array<string> {
  const processedTokens: string[] = [];

  rawTokens.forEach((word, i) => {
    const nextWord = rawTokens[i + 1] || '';
    const prevWord = rawTokens[i - 1] || '';

    const isStopWord = STOP_WORDS.has(word);
    const hasNextValidWord =
      nextWord.length >= MIN_WORD_LENGTH && !STOP_WORDS.has(nextWord);
    const hasPrevValidWord =
      prevWord.length >= MIN_WORD_LENGTH && !STOP_WORDS.has(prevWord);

    if (!isStopWord && word.length >= MIN_WORD_LENGTH) {
      processedTokens.push(word); // Add non-stop words directly
    }

    // Handle the combination of stop words with adjacent valid words
    if (isStopWord) {
      if (hasNextValidWord) {
        processedTokens.push(`${word} ${nextWord}`);
      }
      if (hasPrevValidWord) {
        processedTokens.push(`${prevWord} ${word}`);
      }
    }
  });

  return processedTokens;
}
