export default function titleTokenizer(sequence: string): string[] {
  // Split on spaces and punctuation, preserving Unicode words (including Japanese)

  // Regular Expression /[^\p{L}\p{N}]+/u:
  //   \p{L}
  //     Matches any letter character (from any language, not just English). The
  //     L stands for “Letter” and is Unicode-aware.
  //   \p{N}
  //     Matches any numeric digit.
  //   [^...]
  //     The ^ inside square brackets negates the character set, meaning it
  //     matches any character that is not a letter or a number.
  //   +
  //     This means “one or more occurrences” of the preceding pattern (i.e.,
  //     one or more non-letter, non-number characters).
  //   u
  //     The u flag enables Unicode mode in the regular expression.
  // Meaning:
  //   - The expression splits the sequence string wherever there is one or
  //     more characters that are not letters or numbers (e.g., spaces,
  //     punctuation, or symbols).
  //   - The result is an array of substrings, where each substring is a
  //     continuous sequence of letters or digits.
  const tokens = sequence.split(/[^\p{L}\p{N}]+/u);

  // Filter out empty tokens
  return tokens.filter((token) => token.length > 0);
}
