diff --git a/lib/shared/search-index.js b/lib/shared/search-index.js --- a/lib/shared/search-index.js +++ b/lib/shared/search-index.js @@ -15,13 +15,16 @@ ... }; +type TokenizeFunc = (str: string) => Token[]; +const defaultTokenize: TokenizeFunc = new Tokenizer().words(); + class SearchIndex { tokenize: (str: string) => Token[]; fullTextIndex: { [token: string]: Set }; partialTextIndex: { [token: string]: Set }; - constructor() { - this.tokenize = new Tokenizer().words(); + constructor(inputTokenize?: TokenizeFunc) { + this.tokenize = inputTokenize ?? defaultTokenize; this.fullTextIndex = {}; this.partialTextIndex = {}; } diff --git a/lib/shared/sentence-prefix-search-index.js b/lib/shared/sentence-prefix-search-index.js --- a/lib/shared/sentence-prefix-search-index.js +++ b/lib/shared/sentence-prefix-search-index.js @@ -4,12 +4,15 @@ import SearchIndex from './search-index.js'; +// defaultTokenize used in SearchIndex splits on punctuation +// We use this alternative because we only want to split on whitespace +const tokenize = new Tokenizer().re(/\S+/); + class SentencePrefixSearchIndex extends SearchIndex { entries: Set; constructor() { - super(); - this.tokenize = new Tokenizer().re(/\S+/); + super(tokenize); this.entries = new Set(); }