0
0
Fork 0

Add more accurate account search (#11537)

* Add more accurate account search

When ElasticSearch is available, a more accurate search is implemented:

- Using edge n-gram index for acct and display name
- Using asciifolding and cjk width normalization on display names
- Using Gaussian decay on account activity for additional scoring (recency)
- Using followers/friends ratio for additional scoring (spamminess)
- Using followers number for additional scoring (size)

The exact match precedence only takes effect when the input conforms
to the username format and the username part of it is complete, i.e.
when the user started typing the domain part.

* Support single-letter usernames

* Fix tests

* Fix not picking up account updates

* Add weights and normalization for scores, skip zero terms queries

* Use local counts for accounts index, adjust search parameters

* Fix mistakes

* Using updated_at of accounts is inadequate for remote accounts
This commit is contained in:
Eugen Rochko 2019-08-16 01:24:03 +02:00 committed by GitHub
parent 2ca6b2bb6c
commit 8fdff2748f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 201 additions and 168 deletions

View file

@ -4,47 +4,150 @@ class AccountSearchService < BaseService
attr_reader :query, :limit, :offset, :options, :account
def call(query, account = nil, options = {})
@query = query.strip
@limit = options[:limit].to_i
@offset = options[:offset].to_i
@options = options
@account = account
@acct_hint = query.start_with?('@')
@query = query.strip.gsub(/\A@/, '')
@limit = options[:limit].to_i
@offset = options[:offset].to_i
@options = options
@account = account
search_service_results
search_service_results.compact.uniq
end
private
def search_service_results
return [] if query_blank_or_hashtag? || limit < 1
return [] if query.blank? || limit < 1
if resolving_non_matching_remote_account?
[ResolveAccountService.new.call("#{query_username}@#{query_domain}")].compact
else
search_results_and_exact_match.compact.uniq
[exact_match] + search_results
end
def exact_match
return unless offset.zero? && username_complete?
return @exact_match if defined?(@exact_match)
@exact_match = begin
if options[:resolve]
ResolveAccountService.new.call(query)
elsif domain_is_local?
Account.find_local(query_username)
else
Account.find_remote(query_username, query_domain)
end
end
end
def resolving_non_matching_remote_account?
offset.zero? && options[:resolve] && !exact_match? && !domain_is_local?
def search_results
return [] if limit_for_non_exact_results.zero?
@search_results ||= begin
if Chewy.enabled?
from_elasticsearch
else
from_database
end
end
end
def search_results_and_exact_match
return search_results.to_a unless offset.zero?
results = [exact_match]
return results if exact_match? && limit == 1
results + search_results.to_a
def from_database
if account
advanced_search_results
else
simple_search_results
end
end
def query_blank_or_hashtag?
query.blank? || query.start_with?('#')
def advanced_search_results
Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset)
end
def simple_search_results
Account.search_for(terms_for_query, limit_for_non_exact_results, offset)
end
def from_elasticsearch
must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct) : %w(acct^2 display_name), type: 'best_fields' } }]
should_clauses = []
if account
return [] if options[:following] && following_ids.empty?
if options[:following]
must_clauses << { terms: { id: following_ids } }
elsif following_ids.any?
should_clauses << { terms: { id: following_ids, boost: 100 } }
end
end
query = { bool: { must: must_clauses, should: should_clauses } }
functions = [reputation_score_function, followers_score_function, time_distance_function]
records = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' })
.limit(limit_for_non_exact_results)
.offset(offset)
.objects
.compact
ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
records
end
def reputation_score_function
{
script_score: {
script: {
source: "(doc['followers_count'].value + 0.0) / (doc['followers_count'].value + doc['following_count'].value + 1)",
},
},
}
end
def followers_score_function
{
field_value_factor: {
field: 'followers_count',
modifier: 'log2p',
missing: 1,
},
}
end
def time_distance_function
{
gauss: {
last_status_at: {
scale: '30d',
offset: '30d',
decay: 0.3,
},
},
}
end
def following_ids
@following_ids ||= account.active_relationships.pluck(:target_account_id)
end
def limit_for_non_exact_results
if exact_match?
limit - 1
else
limit
end
end
def terms_for_query
if domain_is_local?
query_username
else
query
end
end
def split_query_string
@split_query_string ||= query.gsub(/\A@/, '').split('@')
@split_query_string ||= query.split('@')
end
def query_username
@ -63,57 +166,15 @@ class AccountSearchService < BaseService
@domain_is_local ||= TagManager.instance.local_domain?(query_domain)
end
def search_from
options[:following] && account ? account.following : Account
end
def exact_match?
exact_match.present?
end
def exact_match
return @exact_match if defined?(@exact_match)
@exact_match = begin
if domain_is_local?
search_from.without_suspended.find_local(query_username)
else
search_from.without_suspended.find_remote(query_username, query_domain)
end
end
def username_complete?
query.include?('@') && "@#{query}" =~ Account::MENTION_RE
end
def search_results
@search_results ||= begin
if account
advanced_search_results
else
simple_search_results
end
end
end
def advanced_search_results
Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset)
end
def simple_search_results
Account.search_for(terms_for_query, limit_for_non_exact_results, offset)
end
def limit_for_non_exact_results
if offset.zero? && exact_match?
limit - 1
else
limit
end
end
def terms_for_query
if domain_is_local?
query_username
else
"#{query_username} #{query_domain}"
end
def likely_acct?
@acct_hint || username_complete?
end
end