0
0
Fork 0

Batched remove status service (#3735)

* Make Pubsubhubbub::DistributionWorker handle both single stream entry
arguments, as well as arrays of stream entries

* Add BatchedRemoveStatusService, make SuspendAccountService use it

* Improve method names

* Add test

* Add more tests

* Use PuSH payloads of 100 to have a clear mapping of
1000 input statuses -> 10 PuSH payloads

It was nice while it lasted
This commit is contained in:
Eugen Rochko 2017-06-14 18:01:35 +02:00 committed by GitHub
parent 4a618908e8
commit e17c2e5da5
5 changed files with 216 additions and 23 deletions

View file

@ -0,0 +1,114 @@
# frozen_string_literal: true
class BatchedRemoveStatusService < BaseService
include StreamEntryRenderer
# Delete given statuses and reblogs of them
# Dispatch PuSH updates of the deleted statuses, but only local ones
# Dispatch Salmon deletes, unique per domain, of the deleted statuses, but only local ones
# Remove statuses from home feeds
# Push delete events to streaming API for home feeds and public feeds
# @param [Status] statuses A preferably batched array of statuses
def call(statuses)
statuses = Status.where(id: statuses.map(&:id)).includes(:account, :stream_entry).flat_map { |status| [status] + status.reblogs.includes(:account, :stream_entry).to_a }
@mentions = statuses.map { |s| [s.id, s.mentions.includes(:account).to_a] }.to_h
@tags = statuses.map { |s| [s.id, s.tags.pluck(:name)] }.to_h
@stream_entry_batches = []
@salmon_batches = []
@json_payloads = statuses.map { |s| [s.id, Oj.dump(event: :delete, payload: s.id)] }.to_h
# Ensure that rendered XML reflects destroyed state
Status.where(id: statuses.map(&:id)).in_batches.destroy_all
# Batch by source account
statuses.group_by(&:account_id).each do |_, account_statuses|
account = account_statuses.first.account
unpush_from_home_timelines(account_statuses)
batch_stream_entries(account_statuses) if account.local?
end
# Cannot be batched
statuses.each do |status|
unpush_from_public_timelines(status)
batch_salmon_slaps(status) if status.local?
end
Pubsubhubbub::DistributionWorker.push_bulk(@stream_entry_batches) { |batch| batch }
NotificationWorker.push_bulk(@salmon_batches) { |batch| batch }
end
private
def batch_stream_entries(statuses)
stream_entry_ids = statuses.map { |s| s.stream_entry.id }
stream_entry_ids.each_slice(100) do |batch_of_stream_entry_ids|
@stream_entry_batches << [batch_of_stream_entry_ids]
end
end
def unpush_from_home_timelines(statuses)
account = statuses.first.account
recipients = account.followers.local.pluck(:id)
recipients << account.id if account.local?
recipients.each do |follower_id|
unpush(follower_id, statuses)
end
end
def unpush_from_public_timelines(status)
payload = @json_payloads[status.id]
redis.pipelined do
redis.publish('timeline:public', payload)
redis.publish('timeline:public:local', payload) if status.local?
@tags[status.id].each do |hashtag|
redis.publish("timeline:hashtag:#{hashtag}", payload)
redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
end
end
end
def batch_salmon_slaps(status)
return if @mentions[status.id].empty?
payload = stream_entry_to_xml(status.stream_entry.reload)
recipients = @mentions[status.id].map(&:account).reject(&:local?).uniq(&:domain).map(&:id)
recipients.each do |recipient_id|
@salmon_batches << [payload, status.account_id, recipient_id]
end
end
def unpush(follower_id, statuses)
key = FeedManager.instance.key(:home, follower_id)
originals = statuses.reject(&:reblog?)
reblogs = statuses.reject { |s| !s.reblog? }
# Quickly remove all originals
redis.pipelined do
originals.each do |status|
redis.zremrangebyscore(key, status.id, status.id)
redis.publish("timeline:#{follower_id}", @json_payloads[status.id])
end
end
# For reblogs, re-add original status to feed, unless the reblog
# was not in the feed in the first place
reblogs.each do |status|
redis.zadd(key, status.reblog_of_id, status.reblog_of_id) unless redis.zscore(key, status.reblog_of_id).nil?
redis.publish("timeline:#{follower_id}", @json_payloads[status.id])
end
end
def redis
Redis.current
end
end

View file

@ -17,9 +17,8 @@ class SuspendAccountService < BaseService
end
def purge_content
@account.statuses.reorder(nil).find_each do |status|
# This federates out deletes to previous followers
RemoveStatusService.new.call(status)
@account.statuses.reorder(nil).find_in_batches do |statuses|
BatchedRemoveStatusService.new.call(statuses)
end
[

View file

@ -5,25 +5,45 @@ class Pubsubhubbub::DistributionWorker
sidekiq_options queue: 'push'
def perform(stream_entry_id)
stream_entry = StreamEntry.find(stream_entry_id)
def perform(stream_entry_ids)
stream_entries = StreamEntry.where(id: stream_entry_ids).includes(:status).reject { |e| e.status&.direct_visibility? }
return if stream_entry.status&.direct_visibility?
return if stream_entries.empty?
@account = stream_entry.account
@payload = AtomSerializer.render(AtomSerializer.new.feed(@account, [stream_entry]))
@domains = @account.followers_domains
@account = stream_entries.first.account
@subscriptions = active_subscriptions.to_a
Subscription.where(account: @account).active.select('id, callback_url').find_each do |subscription|
next if stream_entry.hidden? && !allowed_to_receive?(subscription.callback_url)
Pubsubhubbub::DeliveryWorker.perform_async(subscription.id, @payload)
end
rescue ActiveRecord::RecordNotFound
true
distribute_public!(stream_entries.reject(&:hidden?))
distribute_hidden!(stream_entries.reject { |s| !s.hidden? })
end
private
def distribute_public!(stream_entries)
return if stream_entries.empty?
@payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries))
Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions) do |subscription|
[subscription.id, @payload]
end
end
def distribute_hidden!(stream_entries)
return if stream_entries.empty?
@payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries))
@domains = @account.followers_domains
Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions.reject { |s| !allowed_to_receive?(s.callback_url) }) do |subscription|
[subscription.id, @payload]
end
end
def active_subscriptions
Subscription.where(account: @account).active.select('id, callback_url')
end
def allowed_to_receive?(callback_url)
@domains.include?(Addressable::URI.parse(callback_url).host)
end