From 72423bc8f69ea54faf29eefe6aff32f6d27c217e Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Tue, 8 Aug 2023 09:09:14 +0200 Subject: [PATCH] Change account search tokenizer and queries (#26378) --- app/chewy/accounts_index.rb | 2 +- app/services/account_search_service.rb | 231 +++++++++++++++---------- 2 files changed, 144 insertions(+), 89 deletions(-) diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index abde8e92f1..61f5277d2b 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -33,7 +33,7 @@ class AccountsIndex < Chewy::Index }, verbatim: { - tokenizer: 'whitespace', + tokenizer: 'standard', filter: %w(lowercase asciifolding cjk_width), }, diff --git a/app/services/account_search_service.rb b/app/services/account_search_service.rb index b732fbcec3..a15b691211 100644 --- a/app/services/account_search_service.rb +++ b/app/services/account_search_service.rb @@ -8,6 +8,143 @@ class AccountSearchService < BaseService # Min. number of characters to look for non-exact matches MIN_QUERY_LENGTH = 5 + class QueryBuilder + def initialize(query, account, options = {}) + @query = query + @account = account + @options = options + end + + def build + AccountsIndex.query( + bool: { + must: { + function_score: { + query: { + bool: { + must: must_clauses, + }, + }, + + functions: [ + reputation_score_function, + followers_score_function, + time_distance_function, + ], + }, + }, + + should: should_clauses, + } + ) + end + + private + + def must_clauses + if @account && @options[:following] + [core_query, only_following_query] + else + [core_query] + end + end + + def should_clauses + if @account && !@options[:following] + [boost_following_query] + else + [] + end + end + + # This function limits results to only the accounts the user is following + def only_following_query + { + terms: { + id: following_ids, + }, + } + end + + # This function promotes accounts the user is following + def boost_following_query + { + terms: { + id: following_ids, + boost: 100, + }, + } + end + + # This function deranks accounts that follow more people than follow them + def reputation_score_function + { + script_score: { + script: { + source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)", + }, + }, + } + end + + # This function promotes accounts that have more followers + def followers_score_function + { + script_score: { + script: { + source: "(Math.max(doc['followers_count'].value, 0) / (Math.max(doc['followers_count'].value, 0) + 1))", + }, + }, + } + end + + # This function deranks accounts that haven't posted in a long time + def time_distance_function + { + gauss: { + last_status_at: { + scale: '30d', + offset: '30d', + decay: 0.3, + }, + }, + } + end + + def following_ids + @following_ids ||= @account.active_relationships.pluck(:target_account_id) + [@account.id] + end + end + + class AutocompleteQueryBuilder < QueryBuilder + private + + def core_query + { + multi_match: { + query: @query, + type: 'bool_prefix', + fields: %w(username username.* display_name display_name.*), + }, + } + end + end + + class FullQueryBuilder < QueryBuilder + private + + def core_query + { + multi_match: { + query: @query, + type: 'most_fields', + fields: %w(username^2 display_name^2 text text.*), + operator: 'and', + }, + } + end + end + def call(query, account = nil, options = {}) @query = query&.strip&.gsub(/\A@/, '') @limit = options[:limit].to_i @@ -71,27 +208,15 @@ class AccountSearchService < BaseService end def from_elasticsearch - must_clauses = must_clause - should_clauses = should_clause - - if account - return [] if options[:following] && following_ids.empty? - - if options[:following] - must_clauses << { terms: { id: following_ids } } - elsif following_ids.any? - should_clauses << { terms: { id: following_ids, boost: 100 } } + query_builder = begin + if options[:use_searchable_text] + FullQueryBuilder.new(terms_for_query, account, options.slice(:following)) + else + AutocompleteQueryBuilder.new(terms_for_query, account, options.slice(:following)) end end - query = { bool: { must: must_clauses, should: should_clauses } } - functions = [reputation_score_function, followers_score_function, time_distance_function] - - records = AccountsIndex.query(function_score: { query: query, functions: functions }) - .limit(limit_for_non_exact_results) - .offset(offset) - .objects - .compact + records = query_builder.build.limit(limit_for_non_exact_results).offset(offset).objects.compact ActiveRecord::Associations::Preloader.new(records: records, associations: :account_stat) @@ -100,76 +225,6 @@ class AccountSearchService < BaseService nil end - def reputation_score_function - { - script_score: { - script: { - source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)", - }, - }, - } - end - - def followers_score_function - { - script_score: { - script: { - source: "Math.log10(Math.max(doc['followers_count'].value, 0) + 2)", - }, - }, - } - end - - def time_distance_function - { - gauss: { - last_status_at: { - scale: '30d', - offset: '30d', - decay: 0.3, - }, - }, - } - end - - def must_clause - if options[:start_with_hashtag] - fields = %w(text text.*) - else - fields = %w(username username.* display_name display_name.*) - fields << 'text' << 'text.*' if options[:use_searchable_text] - end - - [ - { - multi_match: { - query: terms_for_query, - fields: fields, - type: 'best_fields', - operator: 'or', - }, - }, - ] - end - - def should_clause - [ - { - multi_match: { - query: terms_for_query, - fields: %w(username username.* display_name display_name.*), - type: 'best_fields', - operator: 'and', - boost: 10, - }, - }, - ] - end - - def following_ids - @following_ids ||= account.active_relationships.pluck(:target_account_id) + [account.id] - end - def limit_for_non_exact_results return 0 if @account.nil? && query.size < MIN_QUERY_LENGTH