0
0
Fork 0

Change how hashtags are normalized (#18795)

* Change how hashtags are normalized

* Fix tests
This commit is contained in:
Eugen Rochko 2022-07-13 15:03:28 +02:00 committed by GitHub
parent 12ed2d793b
commit e7aa2be828
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 193 additions and 51 deletions

View file

@ -62,7 +62,7 @@ class Account < ApplicationRecord
)
USERNAME_RE = /[a-z0-9_]+([a-z0-9_\.-]+[a-z0-9_]+)?/i
MENTION_RE = /(?<=^|[^\/[:word:]])@((#{USERNAME_RE})(?:@[[:word:]\.\-]+[[:word:]]+)?)/i
MENTION_RE = /(?<=^|[^\/[:word:]])@((#{USERNAME_RE})(?:@[[:alnum:]\.\-]+[[:alnum:]]+)?)/i
URL_PREFIX_RE = /\Ahttp(s?):\/\/[^\/]+/
include Attachmentable

View file

@ -3,14 +3,14 @@
#
# Table name: custom_filters
#
# id :bigint not null, primary key
# account_id :bigint
# id :bigint(8) not null, primary key
# account_id :bigint(8)
# expires_at :datetime
# phrase :text default(""), not null
# context :string default([]), not null, is an Array
# created_at :datetime not null
# updated_at :datetime not null
# action :integer default(0), not null
# action :integer default("warn"), not null
#
class CustomFilter < ApplicationRecord

View file

@ -3,8 +3,8 @@
#
# Table name: custom_filter_keywords
#
# id :bigint not null, primary key
# custom_filter_id :bigint not null
# id :bigint(8) not null, primary key
# custom_filter_id :bigint(8) not null
# keyword :text default(""), not null
# whole_word :boolean default(TRUE), not null
# created_at :datetime not null

View file

@ -13,17 +13,19 @@
#
class FeaturedTag < ApplicationRecord
belongs_to :account, inverse_of: :featured_tags, required: true
belongs_to :tag, inverse_of: :featured_tags, required: true
belongs_to :account, inverse_of: :featured_tags
belongs_to :tag, inverse_of: :featured_tags, optional: true # Set after validation
delegate :name, to: :tag, allow_nil: true
validates_associated :tag, on: :create
validates :name, presence: true, on: :create
validate :validate_tag_name, on: :create
validate :validate_featured_tags_limit, on: :create
def name=(str)
self.tag = Tag.find_or_create_by_names(str.strip)&.first
before_create :set_tag
before_create :reset_data
attr_writer :name
def name
tag_id.present? ? tag.name : @name
end
def increment(timestamp)
@ -34,14 +36,23 @@ class FeaturedTag < ApplicationRecord
update(statuses_count: [0, statuses_count - 1].max, last_status_at: account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).where.not(id: deleted_status_id).select(:created_at).first&.created_at)
end
private
def set_tag
self.tag = Tag.find_or_create_by_names(@name)&.first
end
def reset_data
self.statuses_count = account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).count
self.last_status_at = account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).select(:created_at).first&.created_at
end
private
def validate_featured_tags_limit
errors.add(:base, I18n.t('featured_tags.errors.limit')) if account.featured_tags.count >= 10
end
def validate_tag_name
errors.add(:name, :blank) if @name.blank?
errors.add(:name, :invalid) unless @name.match?(/\A(#{Tag::HASHTAG_NAME_RE})\z/i)
end
end

View file

@ -15,6 +15,7 @@
# last_status_at :datetime
# max_score :float
# max_score_at :datetime
# display_name :string
#
class Tag < ApplicationRecord
@ -24,11 +25,12 @@ class Tag < ApplicationRecord
has_many :featured_tags, dependent: :destroy, inverse_of: :tag
HASHTAG_SEPARATORS = "_\u00B7\u200c"
HASHTAG_NAME_RE = "([[:word:]_][[:word:]#{HASHTAG_SEPARATORS}]*[[:alpha:]#{HASHTAG_SEPARATORS}][[:word:]#{HASHTAG_SEPARATORS}]*[[:word:]_])|([[:word:]_]*[[:alpha:]][[:word:]_]*)"
HASHTAG_NAME_RE = "([[:alnum:]_][[:alnum:]#{HASHTAG_SEPARATORS}]*[[:alpha:]#{HASHTAG_SEPARATORS}][[:alnum:]#{HASHTAG_SEPARATORS}]*[[:alnum:]_])|([[:alnum:]_]*[[:alpha:]][[:alnum:]_]*)"
HASHTAG_RE = /(?:^|[^\/\)\w])#(#{HASHTAG_NAME_RE})/i
validates :name, presence: true, format: { with: /\A(#{HASHTAG_NAME_RE})\z/i }
validate :validate_name_change, if: -> { !new_record? && name_changed? }
validate :validate_display_name_change, if: -> { !new_record? && display_name_changed? }
scope :reviewed, -> { where.not(reviewed_at: nil) }
scope :unreviewed, -> { where(reviewed_at: nil) }
@ -46,6 +48,10 @@ class Tag < ApplicationRecord
name
end
def display_name
attributes['display_name'] || name
end
def usable
boolean_with_default('usable', true)
end
@ -90,8 +96,10 @@ class Tag < ApplicationRecord
class << self
def find_or_create_by_names(name_or_names)
Array(name_or_names).map(&method(:normalize)).uniq { |str| str.mb_chars.downcase.to_s }.map do |normalized_name|
tag = matching_name(normalized_name).first || create(name: normalized_name)
names = Array(name_or_names).map { |str| [normalize(str), str] }.uniq(&:first)
names.map do |(normalized_name, display_name)|
tag = matching_name(normalized_name).first || create(name: normalized_name, display_name: display_name)
yield tag if block_given?
@ -129,7 +137,7 @@ class Tag < ApplicationRecord
end
def normalize(str)
str.gsub(/\A#/, '')
HashtagNormalizer.new.normalize(str)
end
end
@ -138,4 +146,8 @@ class Tag < ApplicationRecord
def validate_name_change
errors.add(:name, I18n.t('tags.does_not_match_previous_name')) unless name_was.mb_chars.casecmp(name.mb_chars).zero?
end
def validate_display_name_change
errors.add(:display_name, I18n.t('tags.does_not_match_previous_name')) unless HashtagNormalizer.new.normalize(display_name).casecmp(name.mb_chars).zero?
end
end