0
0
Fork 0

Use charlock_holmes instead of nkf at FetchLinkCardService (#4080)

* Specs for language detection

* Use CharlockHolmes instead of NKF

* Correct mistakes

* Correct style

* Set hint_enc instead of falling back and strip_tags

* Improve specs

* Add dependencies
This commit is contained in:
nullkal 2017-07-09 05:44:31 +09:00 committed by Eugen Rochko
parent 794781d121
commit 007ab330e6
11 changed files with 78 additions and 4 deletions

View file

@ -1,5 +1,4 @@
# frozen_string_literal: true
require 'nkf'
class FetchLinkCardService < BaseService
include HttpHelper
@ -86,7 +85,12 @@ class FetchLinkCardService < BaseService
return if response.code != 200 || response.mime_type != 'text/html'
html = response.to_s
page = Nokogiri::HTML(html, nil, NKF.guess(html).to_s)
detector = CharlockHolmes::EncodingDetector.new
detector.strip_tags = true
guess = detector.detect(html, response.charset)
page = Nokogiri::HTML(html, nil, guess&.fetch(:encoding))
card.type = :link
card.title = meta_property(page, 'og:title') || page.at_xpath('//title')&.content