import { JaroWinklerDistance as jaroWinkler } from './jaro-winkler';
import { isStopWord } from './stop-words';

const DEFAULT_JARO_WINKLER_THRESHOLD = 0.9;
const TYPO_EVALUATION_THRESHOLD = 3;
const MAX_CHARACTER_DELTA = 2;

type Options = {
  /** below this jaro winkler score typo matches won't be included */
  scoreThreshold: number;
  /** if all tokens in a query are shorter than this threshold, no typo matching will be attempted */
  evaluationThreshold: number;
  /** when comparing tokens outside of prefix matches, if the tokens differ in length by this amount they won't be matched */
  maxCharacterDelta: number;
  /** if true, a list of extremely common stop words will be excluded from the typo matching */
  removeStopWords: boolean;
};

const doptions: Options = {
  scoreThreshold: DEFAULT_JARO_WINKLER_THRESHOLD,
  evaluationThreshold: TYPO_EVALUATION_THRESHOLD,
  maxCharacterDelta: MAX_CHARACTER_DELTA,
  removeStopWords: true,
};

// TODO: consider dynamic thresholds based on query length
export function typoMatch(
  qtokens: string[],
  ttokens: string[],
  options: Options = doptions,
): boolean {
  // strip stop words from query and title tokens
  if (options.removeStopWords) {
    qtokens = qtokens.filter((qtoken) => !isStopWord(qtoken));
    ttokens = ttokens.filter((ttoken) => !isStopWord(ttoken));
  }

  // check if query is too short for evaluation
  if (qtokens.every((qtoken) => qtoken.length < options.evaluationThreshold)) {
    return false;
  }

  // any exact prefix matches on significant query vs. title tokens?
  const prefixMatch = qtokens.some((qtoken) => {
    return ttokens.some((ttoken) => {
      return ttoken.startsWith(qtoken);
    });
  });

  if (prefixMatch) return true;

  // jaro winkler similarity
  return qtokens.some((qtoken) => {
    return ttokens.some((ttoken) => {
      // don't consider tokens of significantly different length
      const delta = Math.abs(qtoken.length - ttoken.length);
      if (delta >= options.maxCharacterDelta) return false;

      // don't consider tokens that start with different characters
      if (qtoken[0] !== ttoken[0]) return false;

      return jaroWinkler(qtoken, ttoken) >= options.scoreThreshold;
    });
  });
}
