Changeset View
Standalone View
keyserver/src/database/search-utils.js
// @flow | // @flow | ||||
import natural from 'natural'; | import natural from 'natural'; | ||||
import type { RawMessageInfo } from 'lib/types/message-types'; | import type { RawMessageInfo } from 'lib/types/message-types'; | ||||
import { messageTypes } from 'lib/types/message-types.js'; | import { messageTypes } from 'lib/types/message-types.js'; | ||||
import { dbQuery, SQL } from '../database/database.js'; | import { dbQuery, SQL } from '../database/database.js'; | ||||
async function processMessagesForSearch( | async function processMessagesForSearch( | ||||
messages: $ReadOnlyArray<RawMessageInfo>, | messages: $ReadOnlyArray<RawMessageInfo | ProcessedForSearchRow>, | ||||
): Promise<void> { | ): Promise<void> { | ||||
const processedMessages = []; | const processedMessages = []; | ||||
for (const msg of messages) { | for (const msg of messages) { | ||||
if ( | if ( | ||||
msg.type !== messageTypes.TEXT && | msg.type !== messageTypes.TEXT && | ||||
msg.type !== messageTypes.EDIT_MESSAGE | msg.type !== messageTypes.EDIT_MESSAGE | ||||
) { | ) { | ||||
Show All 20 Lines | await dbQuery(SQL` | ||||
INSERT INTO message_search (original_message_id, message_id, processed_content) | INSERT INTO message_search (original_message_id, message_id, processed_content) | ||||
VALUES ${processedMessages} | VALUES ${processedMessages} | ||||
ON DUPLICATE KEY UPDATE | ON DUPLICATE KEY UPDATE | ||||
message_id = VALUE(message_id), | message_id = VALUE(message_id), | ||||
processed_content = VALUE(processed_content); | processed_content = VALUE(processed_content); | ||||
`); | `); | ||||
} | } | ||||
export { processMessagesForSearch }; | type ProcessedForSearchRowText = { | ||||
+type: 0, | |||||
+id: string, | |||||
+text: string, | |||||
}; | |||||
type ProcessedForSearchRowEdit = { | |||||
+type: 20, | |||||
+id: string, | |||||
+targetMessageID: string, | |||||
+text: string, | |||||
}; | |||||
type ProcessedForSearchRow = | |||||
| ProcessedForSearchRowText | |||||
| ProcessedForSearchRowEdit; | |||||
function processRowsForSearch( | |||||
rows: $ReadOnlyArray<any>, | |||||
): $ReadOnlyArray<ProcessedForSearchRow> { | |||||
const results = []; | |||||
for (const row of rows) { | |||||
if (row.type === messageTypes.TEXT) { | |||||
results.push({ type: row.type, id: row.id, text: row.content }); | |||||
} else if (row.type === messageTypes.EDIT_MESSAGE) { | |||||
results.push({ | |||||
type: row.type, | |||||
id: row.id, | |||||
targetMessageID: row.target_message, | |||||
text: row.content, | |||||
}); | |||||
} | |||||
} | |||||
return results; | |||||
} | |||||
const pageSize = 1001; | |||||
async function processMessagesInDBForSearch(): Promise<void> { | |||||
let lastID = 0; | |||||
inka: I don't think I can avoid this `let` in a reasonable way | |||||
while (true) { | |||||
const [messages] = await dbQuery(SQL` | |||||
SELECT id, type, content, target_message | |||||
FROM messages | |||||
WHERE (type = ${messageTypes.TEXT} OR type = ${messageTypes.EDIT_MESSAGE}) | |||||
AND id > ${lastID} | |||||
ORDER BY id | |||||
LIMIT ${pageSize} | |||||
`); | |||||
const truncatedMessages = | |||||
messages.length < pageSize ? messages : messages.slice(0, -1); | |||||
ashoatUnsubmitted Not Done Inline ActionsI understand your strategy of checking if the LIMIT is returned, and assuming we are down if fewer messages are returned But I'm confused why you aren't processing the 1001th message, and instead fetching it again. Shouldn't we process all of the messages, and set lastID to messages[messages.length - 1].id? ashoat: I understand your strategy of checking if the `LIMIT` is returned, and assuming we are down if… | |||||
ashoatUnsubmitted Not Done Inline Actionss/down/done ashoat: s/down/done | |||||
inkaAuthorUnsubmitted Done Inline ActionsI don't understand what you mean by "s/down/done" Now that I think about this, I think this trick is not very useful in this case. The assumption was it lets us know whether there is a next page to fetch and saves us a query that would return nothing. But actually we have to run the same number of queries, we just send more data in total, since we repeat some. Example: if we had 3003 entries to fetch. In the scenario where we process all of 1001, we would run a 4th query that would return nothing. But in the scenario where we process 1000 out of 1001, we would still run the 4th query, it would just return something. That doesn't really help us. I will fix this and just fetch 1000 inka: I don't understand what you mean by "s/down/done"
Now that I think about this, I think this… | |||||
ashoatUnsubmitted Not Done Inline ActionsThat makes sense. We do something similar in message-fetchers.js (assume that if the result count is the same as the LIMIT, then there is more to fetch), but the difference there is that we have another variable we pass to the client that indicates if we think the result count is "exhaustive" or not. If we weren't passing that variable to the client, then there would be no point ashoat: That makes sense. We do something [similar](https://github. | |||||
ashoatUnsubmitted Not Done Inline Actions(Actually you can ignore my comment, I'm not sure it's relevant) ashoat: (Actually you can ignore my comment, I'm not sure it's relevant) | |||||
const processedRows = processRowsForSearch(truncatedMessages); | |||||
await processMessagesForSearch(processedRows); | |||||
if (messages.length < pageSize) { | |||||
break; | |||||
} | |||||
lastID = truncatedMessages[truncatedMessages.length - 1].id; | |||||
} | |||||
} | |||||
export { processMessagesForSearch, processMessagesInDBForSearch }; |
I don't think I can avoid this let in a reasonable way