0
0
Fork 0

Revamp post filtering system (#18058)

* Add model for custom filter keywords

* Use CustomFilterKeyword internally

Does not change the API

* Fix /filters/edit and /filters/new

* Add migration tests

* Remove whole_word column from custom_filters (covered by custom_filter_keywords)

* Redesign /filters

Instead of a list, present a card that displays more information and handles
multiple keywords per filter.

* Redesign /filters/new and /filters/edit to add and remove keywords

This adds a new gem dependency: cocoon, as well as a npm dependency:
cocoon-js-vanilla. Those are used to easily populate and remove form fields
from the user interface when manipulating multiple keyword filters at once.

* Add /api/v2/filters to edit filter with multiple keywords

Entities:
- `Filter`: `id`, `title`, `filter_action` (either `hide` or `warn`), `context`
  `keywords`
- `FilterKeyword`: `id`, `keyword`, `whole_word`

API endpoits:
- `GET /api/v2/filters` to list filters (including keywords)
- `POST /api/v2/filters` to create a new filter
  `keywords_attributes` can also be passed to create keywords in one request
- `GET /api/v2/filters/:id` to read a particular filter
- `PUT /api/v2/filters/:id` to update a new filter
  `keywords_attributes` can also be passed to edit, delete or add keywords in
   one request
- `DELETE /api/v2/filters/:id` to delete a particular filter
- `GET /api/v2/filters/:id/keywords` to list keywords for a filter
- `POST /api/v2/filters/:filter_id/keywords/:id` to add a new keyword to a
   filter
- `GET /api/v2/filter_keywords/:id` to read a particular keyword
- `PUT /api/v2/filter_keywords/:id` to edit a particular keyword
- `DELETE /api/v2/filter_keywords/:id` to delete a particular keyword

* Change from `irreversible` boolean to `action` enum

* Remove irrelevent `irreversible_must_be_within_context` check

* Fix /filters/new and /filters/edit with update for filter_action

* Fix Rubocop/Codeclimate complaining about task names

* Refactor FeedManager#phrase_filtered?

This moves regexp building and filter caching to the `CustomFilter` class.

This does not change the functional behavior yet, but this changes how the
cache is built, doing per-custom_filter regexps so that filters can be matched
independently, while still offering caching.

* Perform server-side filtering and output result in REST API

* Fix numerous filters_changed events being sent when editing multiple keywords at once

* Add some tests

* Use the new API in the WebUI

- use client-side logic for filters we have fetched rules for.
  This is so that filter changes can be retroactively applied without
  reloading the UI.
- use server-side logic for filters we haven't fetched rules for yet
  (e.g. network error, or initial timeline loading)

* Minor optimizations and refactoring

* Perform server-side filtering on the streaming server

* Change the wording of filter action labels

* Fix issues pointed out by linter

* Change design of “Show anyway” link in accordence to review comments

* Drop “irreversible” filtering behavior

* Move /api/v2/filter_keywords to /api/v1/filters/keywords

* Rename `filter_results` attribute to `filtered`

* Rename REST::LegacyFilterSerializer to REST::V1::FilterSerializer

* Fix systemChannelId value in streaming server

* Simplify code by removing client-side filtering code

The simplifcation comes at a cost though: filters aren't retroactively
applied anymore.
This commit is contained in:
Claire 2022-06-28 09:42:13 +02:00 committed by GitHub
parent 5823ae70c4
commit 02851848e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
60 changed files with 1292 additions and 250 deletions

View file

@ -12,6 +12,7 @@ const url = require('url');
const uuid = require('uuid');
const fs = require('fs');
const WebSocket = require('ws');
const { JSDOM } = require('jsdom');
const env = process.env.NODE_ENV || 'development';
const alwaysRequireAuth = process.env.LIMITED_FEDERATION_MODE === 'true' || process.env.WHITELIST_MODE === 'true' || process.env.AUTHORIZED_FETCH === 'true';
@ -503,6 +504,9 @@ const startWorker = async (workerId) => {
if (event === 'kill') {
log.verbose(req.requestId, `Closing connection for ${req.accountId} due to expired access token`);
eventHandlers.onKill();
} else if (event === 'filters_changed') {
log.verbose(req.requestId, `Invalidating filters cache for ${req.accountId}`);
req.cachedFilters = null;
}
};
};
@ -512,7 +516,8 @@ const startWorker = async (workerId) => {
* @param {any} res
*/
const subscribeHttpToSystemChannel = (req, res) => {
const systemChannelId = `timeline:access_token:${req.accessTokenId}`;
const accessTokenChannelId = `timeline:access_token:${req.accessTokenId}`;
const systemChannelId = `timeline:system:${req.accountId}`;
const listener = createSystemMessageListener(req, {
@ -523,9 +528,11 @@ const startWorker = async (workerId) => {
});
res.on('close', () => {
unsubscribe(`${redisPrefix}${accessTokenChannelId}`, listener);
unsubscribe(`${redisPrefix}${systemChannelId}`, listener);
});
subscribe(`${redisPrefix}${accessTokenChannelId}`, listener);
subscribe(`${redisPrefix}${systemChannelId}`, listener);
};
@ -674,17 +681,84 @@ const startWorker = async (workerId) => {
queries.push(client.query('SELECT 1 FROM account_domain_blocks WHERE account_id = $1 AND domain = $2', [req.accountId, accountDomain]));
}
if (!unpackedPayload.filter_results && !req.cachedFilters) {
queries.push(client.query('SELECT filter.id AS id, filter.phrase AS title, filter.context AS context, filter.expires_at AS expires_at, filter.action AS filter_action, keyword.keyword AS keyword, keyword.whole_word AS whole_word FROM custom_filter_keywords keyword JOIN custom_filters filter ON keyword.custom_filter_id = filter.id WHERE filter.account_id = $1 AND filter.expires_at IS NULL OR filter.expires_at > NOW()', [req.accountId]));
}
Promise.all(queries).then(values => {
done();
if (values[0].rows.length > 0 || (values.length > 1 && values[1].rows.length > 0)) {
if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) {
return;
}
if (!unpackedPayload.filter_results && !req.cachedFilters) {
const filterRows = values[accountDomain ? 2 : 1].rows;
req.cachedFilters = filterRows.reduce((cache, row) => {
if (cache[row.id]) {
cache[row.id].keywords.push([row.keyword, row.whole_word]);
} else {
cache[row.id] = {
keywords: [[row.keyword, row.whole_word]],
expires_at: row.expires_at,
repr: {
id: row.id,
title: row.title,
context: row.context,
expires_at: row.expires_at,
filter_action: row.filter_action,
},
};
}
return cache;
}, {});
Object.keys(req.cachedFilters).forEach((key) => {
req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => {
let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');;
if (whole_word) {
if (/^[\w]/.test(expr)) {
expr = `\\b${expr}`;
}
if (/[\w]$/.test(expr)) {
expr = `${expr}\\b`;
}
}
return expr;
}).join('|'), 'i');
});
}
// Check filters
if (req.cachedFilters && !unpackedPayload.filter_results) {
const status = unpackedPayload;
const searchContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(/<br\s*\/?>/g, '\n').replace(/<\/p><p>/g, '\n\n');
const searchIndex = JSDOM.fragment(searchContent).textContent;
const now = new Date();
payload.filter_results = [];
Object.values(req.cachedFilters).forEach((cachedFilter) => {
if ((cachedFilter.expires_at === null || cachedFilter.expires_at > now)) {
const keyword_matches = searchIndex.match(cachedFilter.regexp);
if (keyword_matches) {
payload.filter_results.push({
filter: cachedFilter.repr,
keyword_matches,
});
}
}
});
}
transmit();
}).catch(err => {
done();
log.error(err);
done();
});
});
};
@ -1009,7 +1083,8 @@ const startWorker = async (workerId) => {
* @param {WebSocketSession} session
*/
const subscribeWebsocketToSystemChannel = ({ socket, request, subscriptions }) => {
const systemChannelId = `timeline:access_token:${request.accessTokenId}`;
const accessTokenChannelId = `timeline:access_token:${request.accessTokenId}`;
const systemChannelId = `timeline:system:${request.accountId}`;
const listener = createSystemMessageListener(request, {
@ -1019,8 +1094,15 @@ const startWorker = async (workerId) => {
});
subscribe(`${redisPrefix}${accessTokenChannelId}`, listener);
subscribe(`${redisPrefix}${systemChannelId}`, listener);
subscriptions[accessTokenChannelId] = {
listener,
stopHeartbeat: () => {
},
};
subscriptions[systemChannelId] = {
listener,
stopHeartbeat: () => {