Differential D7117 Diff 24262 keyserver/src/database/search-utils.js

Changeset View

Standalone View

keyserver/src/database/search-utils.js

// @flow		// @flow

import natural from 'natural';		import natural from 'natural';

import type { RawMessageInfo } from 'lib/types/message-types';		import type { RawMessageInfo } from 'lib/types/message-types';
import { messageTypes } from 'lib/types/message-types.js';		import { messageTypes } from 'lib/types/message-types.js';

import { dbQuery, SQL } from '../database/database.js';		import { dbQuery, SQL } from '../database/database.js';

async function processMessagesForSearch(		async function processMessagesForSearch(
messages: $ReadOnlyArray<RawMessageInfo>,		messages: $ReadOnlyArray<RawMessageInfo \| ProcessedForSearchRow>,
): Promise<void> {		): Promise<void> {
const processedMessages = [];		const processedMessages = [];

for (const msg of messages) {		for (const msg of messages) {
if (		if (
msg.type !== messageTypes.TEXT &&		msg.type !== messageTypes.TEXT &&
msg.type !== messageTypes.EDIT_MESSAGE		msg.type !== messageTypes.EDIT_MESSAGE
) {		) {
Show All 20 Lines	await dbQuery(SQL`
INSERT INTO message_search (original_message_id, message_id, processed_content)		INSERT INTO message_search (original_message_id, message_id, processed_content)
VALUES ${processedMessages}		VALUES ${processedMessages}
ON DUPLICATE KEY UPDATE		ON DUPLICATE KEY UPDATE
message_id = VALUE(message_id),		message_id = VALUE(message_id),
processed_content = VALUE(processed_content);		processed_content = VALUE(processed_content);
`);		`);
}		}

export { processMessagesForSearch };		type ProcessedForSearchRowText = {
		+type: 0,
		+id: string,
		+text: string,
		};
		type ProcessedForSearchRowEdit = {
		+type: 20,
		+id: string,
		+targetMessageID: string,
		+text: string,
		};
		type ProcessedForSearchRow =
		\| ProcessedForSearchRowText
		\| ProcessedForSearchRowEdit;

		function processRowsForSearch(
		rows: $ReadOnlyArray<any>,
		): $ReadOnlyArray<ProcessedForSearchRow> {
		const results = [];
		for (const row of rows) {
		if (row.type === messageTypes.TEXT) {
		results.push({ type: row.type, id: row.id, text: row.content });
		} else if (row.type === messageTypes.EDIT_MESSAGE) {
		results.push({
		type: row.type,
		id: row.id,
		targetMessageID: row.target_message,
		text: row.content,
		});
		}
		}
		return results;
		}

		const pageSize = 1001;

		async function processMessagesInDBForSearch(): Promise<void> {
		let lastID = 0;
		inkaAuthorUnsubmitted Done Inline Actions I don't think I can avoid this `let` in a reasonable way inka: I don't think I can avoid this `let` in a reasonable way

		while (true) {
		const [messages] = await dbQuery(SQL`
		SELECT id, type, content, target_message
		FROM messages
		WHERE (type = ${messageTypes.TEXT} OR type = ${messageTypes.EDIT_MESSAGE})
		AND id > ${lastID}
		ORDER BY id
		LIMIT ${pageSize}
		`);

		const truncatedMessages =
		messages.length < pageSize ? messages : messages.slice(0, -1);
		ashoatUnsubmitted Not Done Inline Actions I understand your strategy of checking if the `LIMIT` is returned, and assuming we are down if fewer messages are returned But I'm confused why you aren't processing the 1001th message, and instead fetching it again. Shouldn't we process all of the messages, and set `lastID` to `messages[messages.length - 1].id`? ashoat: I understand your strategy of checking if the `LIMIT` is returned, and assuming we are down if…
		ashoatUnsubmitted Not Done Inline Actions s/down/done ashoat: s/down/done
		inkaAuthorUnsubmitted Done Inline Actions I don't understand what you mean by "s/down/done" Now that I think about this, I think this trick is not very useful in this case. The assumption was it lets us know whether there is a next page to fetch and saves us a query that would return nothing. But actually we have to run the same number of queries, we just send more data in total, since we repeat some. Example: if we had 3003 entries to fetch. In the scenario where we process all of 1001, we would run a 4th query that would return nothing. But in the scenario where we process 1000 out of 1001, we would still run the 4th query, it would just return something. That doesn't really help us. I will fix this and just fetch 1000 inka: I don't understand what you mean by "s/down/done" Now that I think about this, I think this…
		ashoatUnsubmitted Not Done Inline Actions That makes sense. We do something similar in `message-fetchers.js` (assume that if the result count is the same as the `LIMIT`, then there is more to fetch), but the difference there is that we have another variable we pass to the client that indicates if we think the result count is "exhaustive" or not. If we weren't passing that variable to the client, then there would be no point ashoat: That makes sense. We do something [similar](https://github.
		ashoatUnsubmitted Not Done Inline Actions (Actually you can ignore my comment, I'm not sure it's relevant) ashoat: (Actually you can ignore my comment, I'm not sure it's relevant)

		const processedRows = processRowsForSearch(truncatedMessages);

		await processMessagesForSearch(processedRows);

		if (messages.length < pageSize) {
		break;
		}
		lastID = truncatedMessages[truncatedMessages.length - 1].id;
		}
		}

		export { processMessagesForSearch, processMessagesInDBForSearch };