2022-02-25 08:34:14 +09:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
class Trends::Statuses < Trends::Base
|
|
|
|
PREFIX = 'trending_statuses'
|
|
|
|
|
2023-07-09 03:16:48 +09:00
|
|
|
BATCH_SIZE = 100
|
|
|
|
|
2022-02-25 08:34:14 +09:00
|
|
|
self.default_options = {
|
|
|
|
threshold: 5,
|
|
|
|
review_threshold: 3,
|
2023-09-14 16:20:14 +09:00
|
|
|
score_halflife: 1.hour.freeze,
|
2022-04-30 05:42:42 +09:00
|
|
|
decay_threshold: 0.3,
|
2022-02-25 08:34:14 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
class Query < Trends::Query
|
|
|
|
def filtered_for!(account)
|
|
|
|
@account = account
|
|
|
|
self
|
|
|
|
end
|
|
|
|
|
|
|
|
def filtered_for(account)
|
|
|
|
clone.filtered_for!(account)
|
|
|
|
end
|
|
|
|
|
2022-10-08 23:45:40 +09:00
|
|
|
def to_arel
|
|
|
|
scope = Status.joins(:trend).reorder(score: :desc)
|
|
|
|
scope = scope.reorder(language_order_clause.desc, score: :desc) if preferred_languages.present?
|
|
|
|
scope = scope.merge(StatusTrend.allowed) if @allowed
|
|
|
|
scope = scope.not_excluded_by_account(@account).not_domain_blocked_by_account(@account) if @account.present?
|
|
|
|
scope = scope.offset(@offset) if @offset.present?
|
|
|
|
scope = scope.limit(@limit) if @limit.present?
|
|
|
|
scope
|
|
|
|
end
|
|
|
|
|
2022-02-25 08:34:14 +09:00
|
|
|
private
|
|
|
|
|
2022-10-08 23:45:40 +09:00
|
|
|
def language_order_clause
|
|
|
|
Arel::Nodes::Case.new.when(StatusTrend.arel_table[:language].in(preferred_languages)).then(1).else(0)
|
|
|
|
end
|
|
|
|
|
|
|
|
def preferred_languages
|
|
|
|
if @account&.chosen_languages.present?
|
|
|
|
@account.chosen_languages
|
2022-04-09 00:10:53 +09:00
|
|
|
else
|
2022-10-08 23:45:40 +09:00
|
|
|
@locale
|
2022-04-09 00:10:53 +09:00
|
|
|
end
|
2022-02-25 08:34:14 +09:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def register(status, at_time = Time.now.utc)
|
2022-04-30 05:42:42 +09:00
|
|
|
add(status.proper, status.account_id, at_time) if eligible?(status.proper)
|
2022-02-25 08:34:14 +09:00
|
|
|
end
|
|
|
|
|
|
|
|
def add(status, _account_id, at_time = Time.now.utc)
|
|
|
|
record_used_id(status.id, at_time)
|
|
|
|
end
|
|
|
|
|
|
|
|
def query
|
|
|
|
Query.new(key_prefix, klass)
|
|
|
|
end
|
|
|
|
|
|
|
|
def refresh(at_time = Time.now.utc)
|
2023-07-09 03:16:48 +09:00
|
|
|
# First, recalculate scores for statuses that were trending previously. We split the queries
|
|
|
|
# to avoid having to load all of the IDs into Ruby just to send them back into Postgres
|
2023-09-05 22:37:23 +09:00
|
|
|
Status.where(id: StatusTrend.select(:status_id)).includes(:status_stat, :account).reorder(nil).find_in_batches(batch_size: BATCH_SIZE) do |statuses|
|
2023-07-09 03:16:48 +09:00
|
|
|
calculate_scores(statuses, at_time)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Then, calculate scores for statuses that were used today. There are potentially some
|
|
|
|
# duplicate items here that we might process one more time, but that should be fine
|
2023-09-05 22:37:23 +09:00
|
|
|
Status.where(id: recently_used_ids(at_time)).includes(:status_stat, :account).reorder(nil).find_in_batches(batch_size: BATCH_SIZE) do |statuses|
|
2023-07-09 03:16:48 +09:00
|
|
|
calculate_scores(statuses, at_time)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Now that all trends have up-to-date scores, and all the ones below the threshold have
|
|
|
|
# been removed, we can recalculate their positions
|
2024-02-26 22:45:39 +09:00
|
|
|
StatusTrend.recalculate_ordered_rank
|
2022-02-25 08:34:14 +09:00
|
|
|
end
|
|
|
|
|
|
|
|
def request_review
|
2024-10-30 00:46:32 +09:00
|
|
|
StatusTrend.locales.flat_map do |language|
|
2024-02-26 22:45:39 +09:00
|
|
|
score_at_threshold = StatusTrend.where(language: language, allowed: true).by_rank.ranked_below(options[:review_threshold]).first&.score || 0
|
2022-10-08 23:45:40 +09:00
|
|
|
status_trends = StatusTrend.where(language: language, allowed: false).joins(:status).includes(status: :account)
|
2022-02-25 08:34:14 +09:00
|
|
|
|
2022-10-08 23:45:40 +09:00
|
|
|
status_trends.filter_map do |trend|
|
|
|
|
status = trend.status
|
2022-02-25 08:34:14 +09:00
|
|
|
|
2022-10-08 23:45:40 +09:00
|
|
|
if trend.score > score_at_threshold && !status.trendable? && status.requires_review_notification?
|
|
|
|
status.account.touch(:requested_review_at)
|
|
|
|
status
|
|
|
|
end
|
|
|
|
end
|
2022-02-25 08:34:14 +09:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
|
|
|
def key_prefix
|
|
|
|
PREFIX
|
|
|
|
end
|
|
|
|
|
|
|
|
def klass
|
|
|
|
Status
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def eligible?(status)
|
2023-10-31 07:32:25 +09:00
|
|
|
status.public_visibility? && status.account.discoverable? && !status.account.silenced? && !status.account.sensitized? && status.spoiler_text.blank? && !status.sensitive? && !status.reply? && valid_locale?(status.language)
|
2022-02-25 08:34:14 +09:00
|
|
|
end
|
|
|
|
|
|
|
|
def calculate_scores(statuses, at_time)
|
2022-10-08 23:45:40 +09:00
|
|
|
items = statuses.map do |status|
|
2022-04-30 05:42:42 +09:00
|
|
|
expected = 1.0
|
|
|
|
observed = (status.reblogs_count + status.favourites_count).to_f
|
|
|
|
|
2023-02-19 07:09:40 +09:00
|
|
|
score = if expected > observed || observed < options[:threshold]
|
|
|
|
0
|
|
|
|
else
|
|
|
|
((observed - expected)**2) / expected
|
|
|
|
end
|
|
|
|
|
|
|
|
decaying_score = if score.zero? || !eligible?(status)
|
|
|
|
0
|
|
|
|
else
|
|
|
|
score * (0.5**((at_time.to_f - status.created_at.to_f) / options[:score_halflife].to_f))
|
|
|
|
end
|
2022-02-25 08:34:14 +09:00
|
|
|
|
2022-10-08 23:45:40 +09:00
|
|
|
[decaying_score, status]
|
2022-04-30 05:42:42 +09:00
|
|
|
end
|
2022-02-25 08:34:14 +09:00
|
|
|
|
2022-10-08 23:45:40 +09:00
|
|
|
to_insert = items.filter { |(score, _)| score >= options[:decay_threshold] }
|
|
|
|
to_delete = items.filter { |(score, _)| score < options[:decay_threshold] }
|
2022-02-25 08:34:14 +09:00
|
|
|
|
2023-07-09 03:16:48 +09:00
|
|
|
StatusTrend.upsert_all(to_insert.map { |(score, status)| { status_id: status.id, account_id: status.account_id, score: score, language: status.language, allowed: status.trendable? || false } }, unique_by: :status_id) if to_insert.any?
|
|
|
|
StatusTrend.where(status_id: to_delete.map { |(_, status)| status.id }).delete_all if to_delete.any?
|
2022-04-30 05:42:42 +09:00
|
|
|
end
|
2022-02-25 08:34:14 +09:00
|
|
|
end
|