Ignore low-confidence CharlockHolmes guesses when parsing link cards (#9510)
* Add failing test for windows-1251 link cards * Ignore low-confidence CharlockHolmes guesses Fixes #9466 * Fix no method error when charlock holmes cannot detect charset
This commit is contained in:
parent
4ede51743e
commit
e709b8da0d
3 changed files with 30 additions and 1 deletions
|
@ -137,7 +137,8 @@ class FetchLinkCardService < BaseService
|
|||
detector.strip_tags = true
|
||||
|
||||
guess = detector.detect(@html, @html_charset)
|
||||
page = Nokogiri::HTML(@html, nil, guess&.fetch(:encoding, nil))
|
||||
encoding = guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil
|
||||
page = Nokogiri::HTML(@html, nil, encoding)
|
||||
player_url = meta_property(page, 'twitter:player')
|
||||
|
||||
if player_url && !bad_url?(Addressable::URI.parse(player_url))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue