From 97eddb5906640a79a1cba20823ce10e986f7f317 Mon Sep 17 00:00:00 2001 From: David Roetzel Date: Fri, 5 Jul 2024 15:28:52 +0200 Subject: [PATCH] Fix details extraction when no title exists. (#30933) --- app/lib/link_details_extractor.rb | 2 +- spec/fixtures/requests/page_without_title.txt | 17 +++++++++++++++++ spec/services/fetch_link_card_service_spec.rb | 9 +++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 spec/fixtures/requests/page_without_title.txt diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb index 9436d20b5..a62ede2bb 100644 --- a/app/lib/link_details_extractor.rb +++ b/app/lib/link_details_extractor.rb @@ -156,7 +156,7 @@ class LinkDetailsExtractor end def title - html_entities_decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first).strip + html_entities_decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first)&.strip end def description diff --git a/spec/fixtures/requests/page_without_title.txt b/spec/fixtures/requests/page_without_title.txt new file mode 100644 index 000000000..0054aa3b7 --- /dev/null +++ b/spec/fixtures/requests/page_without_title.txt @@ -0,0 +1,17 @@ +HTTP/1.1 200 OK +server: nginx +date: Thu, 13 Jun 2024 14:33:13 GMT +content-type: text/html; charset=utf-8 +content-length: 171 +accept-ranges: bytes + + + + + + + +

I am not a valid page

+

Thankfully, browsers do not care

+ + diff --git a/spec/services/fetch_link_card_service_spec.rb b/spec/services/fetch_link_card_service_spec.rb index 547737b61..b2cd99cea 100644 --- a/spec/services/fetch_link_card_service_spec.rb +++ b/spec/services/fetch_link_card_service_spec.rb @@ -30,6 +30,7 @@ RSpec.describe FetchLinkCardService do stub_request(:get, 'http://example.com/latin1_posing_as_utf8_broken').to_return(request_fixture('latin1_posing_as_utf8_broken.txt')) stub_request(:get, 'http://example.com/latin1_posing_as_utf8_recoverable').to_return(request_fixture('latin1_posing_as_utf8_recoverable.txt')) stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt')) + stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt')) Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache @@ -112,6 +113,14 @@ RSpec.describe FetchLinkCardService do end end + context 'with a page that has no title' do + let(:status) { Fabricate(:status, text: 'http://example.com/page_without_title') } + + it 'does not create a preview card' do + expect(status.preview_card).to be_nil + end + end + context 'with a 404 URL' do let(:status) { Fabricate(:status, text: 'http://example.com/not-found') }