Revamp post filtering system (#18058)
* Add model for custom filter keywords * Use CustomFilterKeyword internally Does not change the API * Fix /filters/edit and /filters/new * Add migration tests * Remove whole_word column from custom_filters (covered by custom_filter_keywords) * Redesign /filters Instead of a list, present a card that displays more information and handles multiple keywords per filter. * Redesign /filters/new and /filters/edit to add and remove keywords This adds a new gem dependency: cocoon, as well as a npm dependency: cocoon-js-vanilla. Those are used to easily populate and remove form fields from the user interface when manipulating multiple keyword filters at once. * Add /api/v2/filters to edit filter with multiple keywords Entities: - `Filter`: `id`, `title`, `filter_action` (either `hide` or `warn`), `context` `keywords` - `FilterKeyword`: `id`, `keyword`, `whole_word` API endpoits: - `GET /api/v2/filters` to list filters (including keywords) - `POST /api/v2/filters` to create a new filter `keywords_attributes` can also be passed to create keywords in one request - `GET /api/v2/filters/:id` to read a particular filter - `PUT /api/v2/filters/:id` to update a new filter `keywords_attributes` can also be passed to edit, delete or add keywords in one request - `DELETE /api/v2/filters/:id` to delete a particular filter - `GET /api/v2/filters/:id/keywords` to list keywords for a filter - `POST /api/v2/filters/:filter_id/keywords/:id` to add a new keyword to a filter - `GET /api/v2/filter_keywords/:id` to read a particular keyword - `PUT /api/v2/filter_keywords/:id` to edit a particular keyword - `DELETE /api/v2/filter_keywords/:id` to delete a particular keyword * Change from `irreversible` boolean to `action` enum * Remove irrelevent `irreversible_must_be_within_context` check * Fix /filters/new and /filters/edit with update for filter_action * Fix Rubocop/Codeclimate complaining about task names * Refactor FeedManager#phrase_filtered? This moves regexp building and filter caching to the `CustomFilter` class. This does not change the functional behavior yet, but this changes how the cache is built, doing per-custom_filter regexps so that filters can be matched independently, while still offering caching. * Perform server-side filtering and output result in REST API * Fix numerous filters_changed events being sent when editing multiple keywords at once * Add some tests * Use the new API in the WebUI - use client-side logic for filters we have fetched rules for. This is so that filter changes can be retroactively applied without reloading the UI. - use server-side logic for filters we haven't fetched rules for yet (e.g. network error, or initial timeline loading) * Minor optimizations and refactoring * Perform server-side filtering on the streaming server * Change the wording of filter action labels * Fix issues pointed out by linter * Change design of “Show anyway” link in accordence to review comments * Drop “irreversible” filtering behavior * Move /api/v2/filter_keywords to /api/v1/filters/keywords * Rename `filter_results` attribute to `filtered` * Rename REST::LegacyFilterSerializer to REST::V1::FilterSerializer * Fix systemChannelId value in streaming server * Simplify code by removing client-side filtering code The simplifcation comes at a cost though: filters aren't retroactively applied anymore.
This commit is contained in:
parent
5823ae70c4
commit
02851848e9
60 changed files with 1292 additions and 250 deletions
|
@ -12,6 +12,7 @@ const url = require('url');
|
|||
const uuid = require('uuid');
|
||||
const fs = require('fs');
|
||||
const WebSocket = require('ws');
|
||||
const { JSDOM } = require('jsdom');
|
||||
|
||||
const env = process.env.NODE_ENV || 'development';
|
||||
const alwaysRequireAuth = process.env.LIMITED_FEDERATION_MODE === 'true' || process.env.WHITELIST_MODE === 'true' || process.env.AUTHORIZED_FETCH === 'true';
|
||||
|
@ -503,6 +504,9 @@ const startWorker = async (workerId) => {
|
|||
if (event === 'kill') {
|
||||
log.verbose(req.requestId, `Closing connection for ${req.accountId} due to expired access token`);
|
||||
eventHandlers.onKill();
|
||||
} else if (event === 'filters_changed') {
|
||||
log.verbose(req.requestId, `Invalidating filters cache for ${req.accountId}`);
|
||||
req.cachedFilters = null;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
@ -512,7 +516,8 @@ const startWorker = async (workerId) => {
|
|||
* @param {any} res
|
||||
*/
|
||||
const subscribeHttpToSystemChannel = (req, res) => {
|
||||
const systemChannelId = `timeline:access_token:${req.accessTokenId}`;
|
||||
const accessTokenChannelId = `timeline:access_token:${req.accessTokenId}`;
|
||||
const systemChannelId = `timeline:system:${req.accountId}`;
|
||||
|
||||
const listener = createSystemMessageListener(req, {
|
||||
|
||||
|
@ -523,9 +528,11 @@ const startWorker = async (workerId) => {
|
|||
});
|
||||
|
||||
res.on('close', () => {
|
||||
unsubscribe(`${redisPrefix}${accessTokenChannelId}`, listener);
|
||||
unsubscribe(`${redisPrefix}${systemChannelId}`, listener);
|
||||
});
|
||||
|
||||
subscribe(`${redisPrefix}${accessTokenChannelId}`, listener);
|
||||
subscribe(`${redisPrefix}${systemChannelId}`, listener);
|
||||
};
|
||||
|
||||
|
@ -674,17 +681,84 @@ const startWorker = async (workerId) => {
|
|||
queries.push(client.query('SELECT 1 FROM account_domain_blocks WHERE account_id = $1 AND domain = $2', [req.accountId, accountDomain]));
|
||||
}
|
||||
|
||||
if (!unpackedPayload.filter_results && !req.cachedFilters) {
|
||||
queries.push(client.query('SELECT filter.id AS id, filter.phrase AS title, filter.context AS context, filter.expires_at AS expires_at, filter.action AS filter_action, keyword.keyword AS keyword, keyword.whole_word AS whole_word FROM custom_filter_keywords keyword JOIN custom_filters filter ON keyword.custom_filter_id = filter.id WHERE filter.account_id = $1 AND filter.expires_at IS NULL OR filter.expires_at > NOW()', [req.accountId]));
|
||||
}
|
||||
|
||||
Promise.all(queries).then(values => {
|
||||
done();
|
||||
|
||||
if (values[0].rows.length > 0 || (values.length > 1 && values[1].rows.length > 0)) {
|
||||
if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!unpackedPayload.filter_results && !req.cachedFilters) {
|
||||
const filterRows = values[accountDomain ? 2 : 1].rows;
|
||||
|
||||
req.cachedFilters = filterRows.reduce((cache, row) => {
|
||||
if (cache[row.id]) {
|
||||
cache[row.id].keywords.push([row.keyword, row.whole_word]);
|
||||
} else {
|
||||
cache[row.id] = {
|
||||
keywords: [[row.keyword, row.whole_word]],
|
||||
expires_at: row.expires_at,
|
||||
repr: {
|
||||
id: row.id,
|
||||
title: row.title,
|
||||
context: row.context,
|
||||
expires_at: row.expires_at,
|
||||
filter_action: row.filter_action,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return cache;
|
||||
}, {});
|
||||
|
||||
Object.keys(req.cachedFilters).forEach((key) => {
|
||||
req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => {
|
||||
let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');;
|
||||
|
||||
if (whole_word) {
|
||||
if (/^[\w]/.test(expr)) {
|
||||
expr = `\\b${expr}`;
|
||||
}
|
||||
|
||||
if (/[\w]$/.test(expr)) {
|
||||
expr = `${expr}\\b`;
|
||||
}
|
||||
}
|
||||
|
||||
return expr;
|
||||
}).join('|'), 'i');
|
||||
});
|
||||
}
|
||||
|
||||
// Check filters
|
||||
if (req.cachedFilters && !unpackedPayload.filter_results) {
|
||||
const status = unpackedPayload;
|
||||
const searchContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(/<br\s*\/?>/g, '\n').replace(/<\/p><p>/g, '\n\n');
|
||||
const searchIndex = JSDOM.fragment(searchContent).textContent;
|
||||
|
||||
const now = new Date();
|
||||
payload.filter_results = [];
|
||||
Object.values(req.cachedFilters).forEach((cachedFilter) => {
|
||||
if ((cachedFilter.expires_at === null || cachedFilter.expires_at > now)) {
|
||||
const keyword_matches = searchIndex.match(cachedFilter.regexp);
|
||||
if (keyword_matches) {
|
||||
payload.filter_results.push({
|
||||
filter: cachedFilter.repr,
|
||||
keyword_matches,
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
transmit();
|
||||
}).catch(err => {
|
||||
done();
|
||||
log.error(err);
|
||||
done();
|
||||
});
|
||||
});
|
||||
};
|
||||
|
@ -1009,7 +1083,8 @@ const startWorker = async (workerId) => {
|
|||
* @param {WebSocketSession} session
|
||||
*/
|
||||
const subscribeWebsocketToSystemChannel = ({ socket, request, subscriptions }) => {
|
||||
const systemChannelId = `timeline:access_token:${request.accessTokenId}`;
|
||||
const accessTokenChannelId = `timeline:access_token:${request.accessTokenId}`;
|
||||
const systemChannelId = `timeline:system:${request.accountId}`;
|
||||
|
||||
const listener = createSystemMessageListener(request, {
|
||||
|
||||
|
@ -1019,8 +1094,15 @@ const startWorker = async (workerId) => {
|
|||
|
||||
});
|
||||
|
||||
subscribe(`${redisPrefix}${accessTokenChannelId}`, listener);
|
||||
subscribe(`${redisPrefix}${systemChannelId}`, listener);
|
||||
|
||||
subscriptions[accessTokenChannelId] = {
|
||||
listener,
|
||||
stopHeartbeat: () => {
|
||||
},
|
||||
};
|
||||
|
||||
subscriptions[systemChannelId] = {
|
||||
listener,
|
||||
stopHeartbeat: () => {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue