Refactor ActivityPub::FetchRepliesService
and ActivityPub::FetchAllRepliesService
(#34149)
This commit is contained in:
parent
9db26db495
commit
966b816382
7 changed files with 95 additions and 81 deletions
|
@ -338,7 +338,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
|
|||
collection = @object['replies']
|
||||
return if collection.blank?
|
||||
|
||||
replies = ActivityPub::FetchRepliesService.new.call(status, collection, allow_synchronous_requests: false, request_id: @options[:request_id])
|
||||
replies = ActivityPub::FetchRepliesService.new.call(status.account.uri, collection, allow_synchronous_requests: false, request_id: @options[:request_id])
|
||||
return unless replies.nil?
|
||||
|
||||
uri = value_or_id(collection)
|
||||
|
|
|
@ -6,25 +6,15 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
|
|||
# Limit of replies to fetch per status
|
||||
MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_SINGLE'] || 500).to_i
|
||||
|
||||
def call(collection_or_uri, status_uri, max_pages = nil, request_id: nil)
|
||||
@allow_synchronous_requests = true
|
||||
@collection_or_uri = collection_or_uri
|
||||
def call(status_uri, collection_or_uri, max_pages: 1, request_id: nil)
|
||||
@status_uri = status_uri
|
||||
|
||||
@items, n_pages = collection_items(collection_or_uri, max_pages)
|
||||
@items = filtered_replies
|
||||
return if @items.nil?
|
||||
|
||||
FetchReplyWorker.push_bulk(@items) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
|
||||
|
||||
[@items, n_pages]
|
||||
super
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def filtered_replies
|
||||
return if @items.nil?
|
||||
|
||||
def filter_replies(items)
|
||||
# Find all statuses that we *shouldn't* update the replies for, and use that as a filter.
|
||||
# We don't assume that we have the statuses before they're created,
|
||||
# hence the negative filter -
|
||||
|
@ -34,7 +24,7 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
|
|||
#
|
||||
# Typically we assume the number of replies we *shouldn't* fetch is smaller than the
|
||||
# replies we *should* fetch, so we also minimize the number of uris we should load here.
|
||||
uris = @items.map { |item| value_or_id(item) }
|
||||
uris = items.map { |item| value_or_id(item) }
|
||||
|
||||
# Expand collection to get replies in the DB that were
|
||||
# - not included in the collection,
|
||||
|
@ -61,8 +51,4 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
|
|||
Rails.logger.debug { "FetchAllRepliesService - #{@collection_or_uri}: Fetching filtered statuses: #{uris}" }
|
||||
uris
|
||||
end
|
||||
|
||||
def filter_by_host?
|
||||
false
|
||||
end
|
||||
end
|
||||
|
|
|
@ -6,53 +6,56 @@ class ActivityPub::FetchRepliesService < BaseService
|
|||
# Limit of fetched replies
|
||||
MAX_REPLIES = 5
|
||||
|
||||
def call(parent_status, collection_or_uri, allow_synchronous_requests: true, request_id: nil)
|
||||
@account = parent_status.account
|
||||
def call(reference_uri, collection_or_uri, max_pages: 1, allow_synchronous_requests: true, request_id: nil)
|
||||
@reference_uri = reference_uri
|
||||
@allow_synchronous_requests = allow_synchronous_requests
|
||||
|
||||
@items, = collection_items(collection_or_uri)
|
||||
@items, n_pages = collection_items(collection_or_uri, max_pages: max_pages)
|
||||
return if @items.nil?
|
||||
|
||||
FetchReplyWorker.push_bulk(filtered_replies) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
|
||||
@items = filter_replies(@items)
|
||||
FetchReplyWorker.push_bulk(@items) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
|
||||
|
||||
@items
|
||||
[@items, n_pages]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def collection_items(collection_or_uri, max_pages = nil)
|
||||
def collection_items(collection_or_uri, max_pages: 1)
|
||||
collection = fetch_collection(collection_or_uri)
|
||||
return unless collection.is_a?(Hash)
|
||||
|
||||
collection = fetch_collection(collection['first']) if collection['first'].present?
|
||||
return unless collection.is_a?(Hash)
|
||||
|
||||
all_items = []
|
||||
items = []
|
||||
n_pages = 1
|
||||
while collection.is_a?(Hash)
|
||||
items = case collection['type']
|
||||
when 'Collection', 'CollectionPage'
|
||||
collection['items']
|
||||
when 'OrderedCollection', 'OrderedCollectionPage'
|
||||
collection['orderedItems']
|
||||
end
|
||||
items.concat(as_array(collection_page_items(collection)))
|
||||
|
||||
all_items.concat(as_array(items))
|
||||
|
||||
break if all_items.size >= MAX_REPLIES
|
||||
break if !max_pages.nil? && n_pages >= max_pages
|
||||
break if items.size >= MAX_REPLIES
|
||||
break if n_pages >= max_pages
|
||||
|
||||
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
|
||||
n_pages += 1
|
||||
end
|
||||
|
||||
[all_items, n_pages]
|
||||
[items, n_pages]
|
||||
end
|
||||
|
||||
def collection_page_items(collection)
|
||||
case collection['type']
|
||||
when 'Collection', 'CollectionPage'
|
||||
collection['items']
|
||||
when 'OrderedCollection', 'OrderedCollectionPage'
|
||||
collection['orderedItems']
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_collection(collection_or_uri)
|
||||
return collection_or_uri if collection_or_uri.is_a?(Hash)
|
||||
return unless @allow_synchronous_requests
|
||||
return if filter_by_host? && non_matching_uri_hosts?(@account.uri, collection_or_uri)
|
||||
return if non_matching_uri_hosts?(@reference_uri, collection_or_uri)
|
||||
|
||||
# NOTE: For backward compatibility reasons, Mastodon signs outgoing
|
||||
# queries incorrectly by default.
|
||||
|
@ -70,20 +73,11 @@ class ActivityPub::FetchRepliesService < BaseService
|
|||
end
|
||||
end
|
||||
|
||||
def filtered_replies
|
||||
if filter_by_host?
|
||||
# Only fetch replies to the same server as the original status to avoid
|
||||
# amplification attacks.
|
||||
def filter_replies(items)
|
||||
# Only fetch replies to the same server as the original status to avoid
|
||||
# amplification attacks.
|
||||
|
||||
# Also limit to 5 fetched replies to limit potential for DoS.
|
||||
@items.map { |item| value_or_id(item) }.reject { |uri| non_matching_uri_hosts?(@account.uri, uri) }.take(MAX_REPLIES)
|
||||
else
|
||||
@items.map { |item| value_or_id(item) }.take(MAX_REPLIES)
|
||||
end
|
||||
end
|
||||
|
||||
# Whether replies with a different domain than the replied_to post should be rejected
|
||||
def filter_by_host?
|
||||
true
|
||||
# Also limit to 5 fetched replies to limit potential for DoS.
|
||||
items.map { |item| value_or_id(item) }.reject { |uri| non_matching_uri_hosts?(@reference_uri, uri) }.take(MAX_REPLIES)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -51,7 +51,7 @@ class ActivityPub::FetchAllRepliesWorker
|
|||
replies_collection_or_uri = get_replies_uri(status_uri)
|
||||
return if replies_collection_or_uri.nil?
|
||||
|
||||
ActivityPub::FetchAllRepliesService.new.call(replies_collection_or_uri, status_uri, max_pages, **options.deep_symbolize_keys)
|
||||
ActivityPub::FetchAllRepliesService.new.call(status_uri, replies_collection_or_uri, max_pages: max_pages, **options.deep_symbolize_keys)
|
||||
end
|
||||
|
||||
def get_replies_uri(parent_status_uri)
|
||||
|
|
|
@ -7,7 +7,7 @@ class ActivityPub::FetchRepliesWorker
|
|||
sidekiq_options queue: 'pull', retry: 3
|
||||
|
||||
def perform(parent_status_id, replies_uri, options = {})
|
||||
ActivityPub::FetchRepliesService.new.call(Status.find(parent_status_id), replies_uri, **options.deep_symbolize_keys)
|
||||
ActivityPub::FetchRepliesService.new.call(Status.find(parent_status_id).account.uri, replies_uri, **options.deep_symbolize_keys)
|
||||
rescue ActiveRecord::RecordNotFound
|
||||
true
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue