diff --git a/lib/shared/search-index.js b/lib/shared/search-index.js index 73e5ab84a..432b2cadb 100644 --- a/lib/shared/search-index.js +++ b/lib/shared/search-index.js @@ -1,82 +1,86 @@ // @flow import Tokenizer from 'tokenize-text'; type Token = { index: number, match: { [i: number]: string, index: number, input: string, ... }, offset: number, value: string, ... }; class SearchIndex { tokenize: (str: string) => Token[]; fullTextIndex: { [token: string]: Set }; partialTextIndex: { [token: string]: Set }; constructor() { this.tokenize = new Tokenizer().words(); this.fullTextIndex = {}; this.partialTextIndex = {}; } + addAllPrefixes(id: string, value: string): void { + if (this.fullTextIndex[value] === undefined) { + this.fullTextIndex[value] = new Set(); + } + this.fullTextIndex[value].add(id); + let partialString = ''; + for (let i = 0; i < value.length; i++) { + const char = value[i]; + partialString += char; + // TODO probably should do some stopwords here + if (this.partialTextIndex[partialString] === undefined) { + this.partialTextIndex[partialString] = new Set(); + } + this.partialTextIndex[partialString].add(id); + } + } + addEntry(id: string, rawText: string) { const keywords = this.tokenize(rawText); for (const keyword of keywords) { const value = keyword.value.toLowerCase(); - if (this.fullTextIndex[value] === undefined) { - this.fullTextIndex[value] = new Set(); - } - this.fullTextIndex[value].add(id); - let partialString = ''; - for (let i = 0; i < value.length; i++) { - const char = value[i]; - partialString += char; - // TODO probably should do some stopwords here - if (this.partialTextIndex[partialString] === undefined) { - this.partialTextIndex[partialString] = new Set(); - } - this.partialTextIndex[partialString].add(id); - } + this.addAllPrefixes(id, value); } } getSearchResults(query: string): string[] { const keywords = this.tokenize(query); if (keywords.length === 0) { return []; } const lastKeyword = keywords[keywords.length - 1]; const lastKeywordValue = lastKeyword.value.toLowerCase(); const lastMatchSet = lastKeyword.match.input.match(/\S$/) ? this.partialTextIndex[lastKeywordValue] : this.fullTextIndex[lastKeywordValue]; if (!lastMatchSet) { return []; } const fullKeywords = keywords.slice(0, -1).map(k => k.value.toLowerCase()); let possibleMatches: string[] = Array.from(lastMatchSet); for (const keyword of fullKeywords) { const fullMatches = this.fullTextIndex[keyword]; if (!fullMatches) { return []; } possibleMatches = possibleMatches.filter(id => fullMatches.has(id)); if (possibleMatches.length === 0) { return []; } } return possibleMatches; } } export default SearchIndex; diff --git a/lib/shared/sentence-prefix-search-index.js b/lib/shared/sentence-prefix-search-index.js new file mode 100644 index 000000000..4848f2f0b --- /dev/null +++ b/lib/shared/sentence-prefix-search-index.js @@ -0,0 +1,34 @@ +// @flow + +import Tokenizer from 'tokenize-text'; + +import SearchIndex from './search-index.js'; + +class SentencePrefixSearchIndex extends SearchIndex { + entries: Set; + + constructor() { + super(); + this.tokenize = new Tokenizer().re(/\S+/); + this.entries = new Set(); + } + + addEntry(id: string, rawText: string) { + const keywords = this.tokenize(rawText); + for (const keyword of keywords) { + const value = rawText.slice(keyword.index).toLowerCase(); + this.addAllPrefixes(id, value); + } + this.entries.add(id); + } + + getSearchResults(query: string): string[] { + const transformedQuery = query.toLowerCase(); + if (this.partialTextIndex[transformedQuery]) { + return Array.from(this.partialTextIndex[transformedQuery]); + } + return []; + } +} + +export default SentencePrefixSearchIndex;