0
0
Fork 0

Refactor ActivityPub::FetchRepliesService and ActivityPub::FetchAllRepliesService (#34149)

This commit is contained in:
Claire 2025-03-12 12:52:38 +01:00 committed by GitHub
parent 9db26db495
commit 966b816382
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 95 additions and 81 deletions

View file

@ -338,7 +338,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
collection = @object['replies'] collection = @object['replies']
return if collection.blank? return if collection.blank?
replies = ActivityPub::FetchRepliesService.new.call(status, collection, allow_synchronous_requests: false, request_id: @options[:request_id]) replies = ActivityPub::FetchRepliesService.new.call(status.account.uri, collection, allow_synchronous_requests: false, request_id: @options[:request_id])
return unless replies.nil? return unless replies.nil?
uri = value_or_id(collection) uri = value_or_id(collection)

View file

@ -6,25 +6,15 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
# Limit of replies to fetch per status # Limit of replies to fetch per status
MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_SINGLE'] || 500).to_i MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_SINGLE'] || 500).to_i
def call(collection_or_uri, status_uri, max_pages = nil, request_id: nil) def call(status_uri, collection_or_uri, max_pages: 1, request_id: nil)
@allow_synchronous_requests = true
@collection_or_uri = collection_or_uri
@status_uri = status_uri @status_uri = status_uri
@items, n_pages = collection_items(collection_or_uri, max_pages) super
@items = filtered_replies
return if @items.nil?
FetchReplyWorker.push_bulk(@items) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
[@items, n_pages]
end end
private private
def filtered_replies def filter_replies(items)
return if @items.nil?
# Find all statuses that we *shouldn't* update the replies for, and use that as a filter. # Find all statuses that we *shouldn't* update the replies for, and use that as a filter.
# We don't assume that we have the statuses before they're created, # We don't assume that we have the statuses before they're created,
# hence the negative filter - # hence the negative filter -
@ -34,7 +24,7 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
# #
# Typically we assume the number of replies we *shouldn't* fetch is smaller than the # Typically we assume the number of replies we *shouldn't* fetch is smaller than the
# replies we *should* fetch, so we also minimize the number of uris we should load here. # replies we *should* fetch, so we also minimize the number of uris we should load here.
uris = @items.map { |item| value_or_id(item) } uris = items.map { |item| value_or_id(item) }
# Expand collection to get replies in the DB that were # Expand collection to get replies in the DB that were
# - not included in the collection, # - not included in the collection,
@ -61,8 +51,4 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
Rails.logger.debug { "FetchAllRepliesService - #{@collection_or_uri}: Fetching filtered statuses: #{uris}" } Rails.logger.debug { "FetchAllRepliesService - #{@collection_or_uri}: Fetching filtered statuses: #{uris}" }
uris uris
end end
def filter_by_host?
false
end
end end

View file

@ -6,53 +6,56 @@ class ActivityPub::FetchRepliesService < BaseService
# Limit of fetched replies # Limit of fetched replies
MAX_REPLIES = 5 MAX_REPLIES = 5
def call(parent_status, collection_or_uri, allow_synchronous_requests: true, request_id: nil) def call(reference_uri, collection_or_uri, max_pages: 1, allow_synchronous_requests: true, request_id: nil)
@account = parent_status.account @reference_uri = reference_uri
@allow_synchronous_requests = allow_synchronous_requests @allow_synchronous_requests = allow_synchronous_requests
@items, = collection_items(collection_or_uri) @items, n_pages = collection_items(collection_or_uri, max_pages: max_pages)
return if @items.nil? return if @items.nil?
FetchReplyWorker.push_bulk(filtered_replies) { |reply_uri| [reply_uri, { 'request_id' => request_id }] } @items = filter_replies(@items)
FetchReplyWorker.push_bulk(@items) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
@items [@items, n_pages]
end end
private private
def collection_items(collection_or_uri, max_pages = nil) def collection_items(collection_or_uri, max_pages: 1)
collection = fetch_collection(collection_or_uri) collection = fetch_collection(collection_or_uri)
return unless collection.is_a?(Hash) return unless collection.is_a?(Hash)
collection = fetch_collection(collection['first']) if collection['first'].present? collection = fetch_collection(collection['first']) if collection['first'].present?
return unless collection.is_a?(Hash) return unless collection.is_a?(Hash)
all_items = [] items = []
n_pages = 1 n_pages = 1
while collection.is_a?(Hash) while collection.is_a?(Hash)
items = case collection['type'] items.concat(as_array(collection_page_items(collection)))
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
all_items.concat(as_array(items)) break if items.size >= MAX_REPLIES
break if n_pages >= max_pages
break if all_items.size >= MAX_REPLIES
break if !max_pages.nil? && n_pages >= max_pages
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
n_pages += 1 n_pages += 1
end end
[all_items, n_pages] [items, n_pages]
end
def collection_page_items(collection)
case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
end end
def fetch_collection(collection_or_uri) def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash) return collection_or_uri if collection_or_uri.is_a?(Hash)
return unless @allow_synchronous_requests return unless @allow_synchronous_requests
return if filter_by_host? && non_matching_uri_hosts?(@account.uri, collection_or_uri) return if non_matching_uri_hosts?(@reference_uri, collection_or_uri)
# NOTE: For backward compatibility reasons, Mastodon signs outgoing # NOTE: For backward compatibility reasons, Mastodon signs outgoing
# queries incorrectly by default. # queries incorrectly by default.
@ -70,20 +73,11 @@ class ActivityPub::FetchRepliesService < BaseService
end end
end end
def filtered_replies def filter_replies(items)
if filter_by_host? # Only fetch replies to the same server as the original status to avoid
# Only fetch replies to the same server as the original status to avoid # amplification attacks.
# amplification attacks.
# Also limit to 5 fetched replies to limit potential for DoS. # Also limit to 5 fetched replies to limit potential for DoS.
@items.map { |item| value_or_id(item) }.reject { |uri| non_matching_uri_hosts?(@account.uri, uri) }.take(MAX_REPLIES) items.map { |item| value_or_id(item) }.reject { |uri| non_matching_uri_hosts?(@reference_uri, uri) }.take(MAX_REPLIES)
else
@items.map { |item| value_or_id(item) }.take(MAX_REPLIES)
end
end
# Whether replies with a different domain than the replied_to post should be rejected
def filter_by_host?
true
end end
end end

View file

@ -51,7 +51,7 @@ class ActivityPub::FetchAllRepliesWorker
replies_collection_or_uri = get_replies_uri(status_uri) replies_collection_or_uri = get_replies_uri(status_uri)
return if replies_collection_or_uri.nil? return if replies_collection_or_uri.nil?
ActivityPub::FetchAllRepliesService.new.call(replies_collection_or_uri, status_uri, max_pages, **options.deep_symbolize_keys) ActivityPub::FetchAllRepliesService.new.call(status_uri, replies_collection_or_uri, max_pages: max_pages, **options.deep_symbolize_keys)
end end
def get_replies_uri(parent_status_uri) def get_replies_uri(parent_status_uri)

View file

@ -7,7 +7,7 @@ class ActivityPub::FetchRepliesWorker
sidekiq_options queue: 'pull', retry: 3 sidekiq_options queue: 'pull', retry: 3
def perform(parent_status_id, replies_uri, options = {}) def perform(parent_status_id, replies_uri, options = {})
ActivityPub::FetchRepliesService.new.call(Status.find(parent_status_id), replies_uri, **options.deep_symbolize_keys) ActivityPub::FetchRepliesService.new.call(Status.find(parent_status_id).account.uri, replies_uri, **options.deep_symbolize_keys)
rescue ActiveRecord::RecordNotFound rescue ActiveRecord::RecordNotFound
true true
end end

View file

@ -10,17 +10,17 @@ RSpec.describe ActivityPub::FetchAllRepliesService do
let(:collection_uri) { 'http://example.com/replies/1' } let(:collection_uri) { 'http://example.com/replies/1' }
let(:items) do let(:items) do
[ %w(
'http://example.com/self-reply-1', http://example.com/self-reply-1
'http://example.com/self-reply-2', http://example.com/self-reply-2
'http://example.com/self-reply-3', http://example.com/self-reply-3
'http://other.com/other-reply-1', http://other.com/other-reply-1
'http://other.com/other-reply-2', http://other.com/other-reply-2
'http://other.com/other-reply-3', http://other.com/other-reply-3
'http://example.com/self-reply-4', http://example.com/self-reply-4
'http://example.com/self-reply-5', http://example.com/self-reply-5
'http://example.com/self-reply-6', http://example.com/self-reply-6
] )
end end
let(:payload) do let(:payload) do
@ -36,10 +36,21 @@ RSpec.describe ActivityPub::FetchAllRepliesService do
it 'fetches more than the default maximum and from multiple domains' do it 'fetches more than the default maximum and from multiple domains' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(payload, status.uri) subject.call(status.uri, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(%w(http://example.com/self-reply-1 http://example.com/self-reply-2 http://example.com/self-reply-3 http://other.com/other-reply-1 http://other.com/other-reply-2 http://other.com/other-reply-3 http://example.com/self-reply-4 expect(FetchReplyWorker).to have_received(:push_bulk).with(
http://example.com/self-reply-5 http://example.com/self-reply-6)) %w(
http://example.com/self-reply-1
http://example.com/self-reply-2
http://example.com/self-reply-3
http://other.com/other-reply-1
http://other.com/other-reply-2
http://other.com/other-reply-3
http://example.com/self-reply-4
http://example.com/self-reply-5
http://example.com/self-reply-6
)
)
end end
context 'with a recent status' do context 'with a recent status' do
@ -50,9 +61,20 @@ RSpec.describe ActivityPub::FetchAllRepliesService do
it 'skips statuses that have been updated recently' do it 'skips statuses that have been updated recently' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(payload, status.uri) subject.call(status.uri, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(%w(http://example.com/self-reply-1 http://example.com/self-reply-3 http://other.com/other-reply-1 http://other.com/other-reply-2 http://other.com/other-reply-3 http://example.com/self-reply-4 http://example.com/self-reply-5 http://example.com/self-reply-6)) expect(FetchReplyWorker).to have_received(:push_bulk).with(
%w(
http://example.com/self-reply-1
http://example.com/self-reply-3
http://other.com/other-reply-1
http://other.com/other-reply-2
http://other.com/other-reply-3
http://example.com/self-reply-4
http://example.com/self-reply-5
http://example.com/self-reply-6
)
)
end end
end end
@ -64,7 +86,7 @@ RSpec.describe ActivityPub::FetchAllRepliesService do
it 'updates the time that fetched statuses were last fetched' do it 'updates the time that fetched statuses were last fetched' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(payload, status.uri) subject.call(status.uri, payload)
expect(Status.find_by(uri: 'http://other.com/other-reply-1').fetched_replies_at).to be >= 1.minute.ago expect(Status.find_by(uri: 'http://other.com/other-reply-1').fetched_replies_at).to be >= 1.minute.ago
end end
@ -80,10 +102,22 @@ RSpec.describe ActivityPub::FetchAllRepliesService do
it 'updates the unsubscribed replies' do it 'updates the unsubscribed replies' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(payload, status.uri) subject.call(status.uri, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(%w(http://example.com/self-reply-1 http://example.com/self-reply-2 http://example.com/self-reply-3 http://other.com/other-reply-1 http://other.com/other-reply-2 http://other.com/other-reply-3 http://example.com/self-reply-4 expect(FetchReplyWorker).to have_received(:push_bulk).with(
http://example.com/self-reply-5 http://example.com/self-reply-6 http://other.com/account/unsubscribed)) %w(
http://example.com/self-reply-1
http://example.com/self-reply-2
http://example.com/self-reply-3
http://other.com/other-reply-1
http://other.com/other-reply-2
http://other.com/other-reply-3
http://example.com/self-reply-4
http://example.com/self-reply-5
http://example.com/self-reply-6
http://other.com/account/unsubscribed
)
)
end end
end end
end end

View file

@ -40,7 +40,7 @@ RSpec.describe ActivityPub::FetchRepliesService do
it 'queues the expected worker' do it 'queues the expected worker' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, payload) subject.call(status.account.uri, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1']) expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1'])
end end
@ -50,7 +50,7 @@ RSpec.describe ActivityPub::FetchRepliesService do
it 'spawns workers for up to 5 replies on the same server' do it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, payload) subject.call(status.account.uri, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end end
@ -64,7 +64,7 @@ RSpec.describe ActivityPub::FetchRepliesService do
it 'spawns workers for up to 5 replies on the same server' do it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, collection_uri) subject.call(status.account.uri, collection_uri)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end end
@ -85,7 +85,7 @@ RSpec.describe ActivityPub::FetchRepliesService do
it 'spawns workers for up to 5 replies on the same server' do it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, payload) subject.call(status.account.uri, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end end
@ -99,7 +99,7 @@ RSpec.describe ActivityPub::FetchRepliesService do
it 'spawns workers for up to 5 replies on the same server' do it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, collection_uri) subject.call(status.account.uri, collection_uri)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end end
@ -124,7 +124,7 @@ RSpec.describe ActivityPub::FetchRepliesService do
it 'spawns workers for up to 5 replies on the same server' do it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, payload) subject.call(status.account.uri, payload)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end end
@ -138,7 +138,7 @@ RSpec.describe ActivityPub::FetchRepliesService do
it 'spawns workers for up to 5 replies on the same server' do it 'spawns workers for up to 5 replies on the same server' do
allow(FetchReplyWorker).to receive(:push_bulk) allow(FetchReplyWorker).to receive(:push_bulk)
subject.call(status, collection_uri) subject.call(status.account.uri, collection_uri)
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
end end