Disable language detection for texts shorter than 140 characters (#8010)
If the input text is blank after preparation (only mention, or only URL, or empty as in a media post), then use nil as language, since it's OK to show to everyone. Otherwise, always fall back to the server's default locale
This commit is contained in:
parent
0180037dfb
commit
38e9662d78
2 changed files with 14 additions and 5 deletions
|
@ -3,12 +3,16 @@
|
|||
class LanguageDetector
|
||||
include Singleton
|
||||
|
||||
CHARACTER_THRESHOLD = 140
|
||||
|
||||
def initialize
|
||||
@identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
|
||||
end
|
||||
|
||||
def detect(text, account)
|
||||
detect_language_code(text) || default_locale(account)
|
||||
input_text = prepare_text(text)
|
||||
return if input_text.blank?
|
||||
detect_language_code(input_text) || default_locale(account)
|
||||
end
|
||||
|
||||
def language_names
|
||||
|
@ -23,8 +27,13 @@ class LanguageDetector
|
|||
simplify_text(text).strip
|
||||
end
|
||||
|
||||
def unreliable_input?(text)
|
||||
text.size < CHARACTER_THRESHOLD
|
||||
end
|
||||
|
||||
def detect_language_code(text)
|
||||
result = @identifier.find_language(prepare_text(text))
|
||||
return if unreliable_input?(text)
|
||||
result = @identifier.find_language(text)
|
||||
iso6391(result.language.to_s).to_sym if result.reliable?
|
||||
end
|
||||
|
||||
|
@ -66,6 +75,6 @@ class LanguageDetector
|
|||
end
|
||||
|
||||
def default_locale(account)
|
||||
account.user_locale&.to_sym
|
||||
account.user_locale&.to_sym || I18n.default_locale
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue