Add support for structured data and more OpenGraph tags to link cards (#16938)
Save preview cards under their canonical URL Increase max redirects to follow from 2 to 3
This commit is contained in:
parent
989c67d29d
commit
39cdf61ab7
5 changed files with 260 additions and 52 deletions
|
@ -13,12 +13,12 @@ class FetchLinkCardService < BaseService
|
|||
}iox
|
||||
|
||||
def call(status)
|
||||
@status = status
|
||||
@url = parse_urls
|
||||
@status = status
|
||||
@original_url = parse_urls
|
||||
|
||||
return if @url.nil? || @status.preview_cards.any?
|
||||
return if @original_url.nil? || @status.preview_cards.any?
|
||||
|
||||
@url = @url.to_s
|
||||
@url = @original_url.to_s
|
||||
|
||||
RedisLock.acquire(lock_options) do |lock|
|
||||
if lock.acquired?
|
||||
|
@ -31,7 +31,7 @@ class FetchLinkCardService < BaseService
|
|||
|
||||
attach_card if @card&.persisted?
|
||||
rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e
|
||||
Rails.logger.debug "Error fetching link #{@url}: #{e}"
|
||||
Rails.logger.debug "Error fetching link #{@original_url}: #{e}"
|
||||
nil
|
||||
end
|
||||
|
||||
|
@ -47,6 +47,12 @@ class FetchLinkCardService < BaseService
|
|||
return @html if defined?(@html)
|
||||
|
||||
Request.new(:get, @url).add_headers('Accept' => 'text/html', 'User-Agent' => Mastodon::Version.user_agent + ' Bot').perform do |res|
|
||||
# We follow redirects, and ideally we want to save the preview card for
|
||||
# the destination URL and not any link shortener in-between, so here
|
||||
# we set the URL to the one of the last response in the redirect chain
|
||||
@url = res.request.uri.to_s.to_s
|
||||
@card = PreviewCard.find_or_initialize_by(url: @url) if @card.url != @url
|
||||
|
||||
if res.code == 200 && res.mime_type == 'text/html'
|
||||
@html_charset = res.charset
|
||||
@html = res.body_with_limit
|
||||
|
@ -63,12 +69,15 @@ class FetchLinkCardService < BaseService
|
|||
end
|
||||
|
||||
def parse_urls
|
||||
if @status.local?
|
||||
urls = @status.text.scan(URL_PATTERN).map { |array| Addressable::URI.parse(array[1]).normalize }
|
||||
else
|
||||
html = Nokogiri::HTML(@status.text)
|
||||
links = html.css('a')
|
||||
urls = links.filter_map { |a| Addressable::URI.parse(a['href']) unless skip_link?(a) }.filter_map(&:normalize)
|
||||
urls = begin
|
||||
if @status.local?
|
||||
@status.text.scan(URL_PATTERN).map { |array| Addressable::URI.parse(array[1]).normalize }
|
||||
else
|
||||
document = Nokogiri::HTML(@status.text)
|
||||
links = document.css('a')
|
||||
|
||||
links.filter_map { |a| Addressable::URI.parse(a['href']) unless skip_link?(a) }.filter_map(&:normalize)
|
||||
end
|
||||
end
|
||||
|
||||
urls.reject { |uri| bad_url?(uri) }.first
|
||||
|
@ -79,18 +88,16 @@ class FetchLinkCardService < BaseService
|
|||
uri.host.blank? || TagManager.instance.local_url?(uri.to_s) || !%w(http https).include?(uri.scheme)
|
||||
end
|
||||
|
||||
# rubocop:disable Naming/MethodParameterName
|
||||
def mention_link?(a)
|
||||
def mention_link?(anchor)
|
||||
@status.mentions.any? do |mention|
|
||||
a['href'] == ActivityPub::TagManager.instance.url_for(mention.account)
|
||||
anchor['href'] == ActivityPub::TagManager.instance.url_for(mention.account)
|
||||
end
|
||||
end
|
||||
|
||||
def skip_link?(a)
|
||||
def skip_link?(anchor)
|
||||
# Avoid links for hashtags and mentions (microformats)
|
||||
a['rel']&.include?('tag') || a['class']&.match?(/u-url|h-card/) || mention_link?(a)
|
||||
anchor['rel']&.include?('tag') || anchor['class']&.match?(/u-url|h-card/) || mention_link?(anchor)
|
||||
end
|
||||
# rubocop:enable Naming/MethodParameterName
|
||||
|
||||
def attempt_oembed
|
||||
service = FetchOEmbedService.new
|
||||
|
@ -139,42 +146,14 @@ class FetchLinkCardService < BaseService
|
|||
def attempt_opengraph
|
||||
return if html.nil?
|
||||
|
||||
detector = CharlockHolmes::EncodingDetector.new
|
||||
detector.strip_tags = true
|
||||
link_details_extractor = LinkDetailsExtractor.new(@url, @html, @html_charset)
|
||||
|
||||
guess = detector.detect(@html, @html_charset)
|
||||
encoding = guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil
|
||||
page = Nokogiri::HTML(@html, nil, encoding)
|
||||
player_url = meta_property(page, 'twitter:player')
|
||||
|
||||
if player_url && !bad_url?(Addressable::URI.parse(player_url))
|
||||
@card.type = :video
|
||||
@card.width = meta_property(page, 'twitter:player:width') || 0
|
||||
@card.height = meta_property(page, 'twitter:player:height') || 0
|
||||
@card.html = content_tag(:iframe, nil, src: player_url,
|
||||
width: @card.width,
|
||||
height: @card.height,
|
||||
allowtransparency: 'true',
|
||||
scrolling: 'no',
|
||||
frameborder: '0')
|
||||
else
|
||||
@card.type = :link
|
||||
end
|
||||
|
||||
@card.title = meta_property(page, 'og:title').presence || page.at_xpath('//title')&.content || ''
|
||||
@card.description = meta_property(page, 'og:description').presence || meta_property(page, 'description') || ''
|
||||
@card.image_remote_url = (Addressable::URI.parse(@url) + meta_property(page, 'og:image')).to_s if meta_property(page, 'og:image')
|
||||
|
||||
return if @card.title.blank? && @card.html.blank?
|
||||
|
||||
@card.save_with_optional_image!
|
||||
end
|
||||
|
||||
def meta_property(page, property)
|
||||
page.at_xpath("//meta[contains(concat(' ', normalize-space(@property), ' '), ' #{property} ')]")&.attribute('content')&.value || page.at_xpath("//meta[@name=\"#{property}\"]")&.attribute('content')&.value
|
||||
@card = PreviewCard.find_or_initialize_by(url: link_details_extractor.canonical_url) if link_details_extractor.canonical_url != @card.url
|
||||
@card.assign_attributes(link_details_extractor.to_preview_card_attributes)
|
||||
@card.save_with_optional_image! unless @card.title.blank? && @card.html.blank?
|
||||
end
|
||||
|
||||
def lock_options
|
||||
{ redis: Redis.current, key: "fetch:#{@url}", autorelease: 15.minutes.seconds }
|
||||
{ redis: Redis.current, key: "fetch:#{@original_url}", autorelease: 15.minutes.seconds }
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue