0
0
Fork 0

feat: remove js2py dependency (#307)

* feat: remove js2py dependency

* style: apply code stype isort

* feat: remove interpreter

* style: fix type hints

* test: fix image url fixture

* feat: add refresh func and fix some problem

* style: fix type hints

* style: common js is native we refresh gg js

* feat: remove common js request func

* chore(deps): remove bs4
This commit is contained in:
Ryu Juheon 2022-10-28 11:39:27 +09:00 committed by GitHub
parent 080e74b5a8
commit fd200eb2cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 189 additions and 289 deletions

View file

@ -25,8 +25,12 @@ SOFTWARE.
def main() -> None: # pragma: no cover
# I've done all of my testing on this.
from functools import partial
from sys import argv
from sanic import Sanic
from sanic.worker.loader import AppLoader
from heliotrope.argparser import parse_args
from heliotrope.config import HeliotropeConfig
from heliotrope.server import create_app
@ -34,15 +38,18 @@ def main() -> None: # pragma: no cover
heliotrope_config = HeliotropeConfig()
args = parse_args(argv[1:])
heliotrope_config.update_with_args(args)
create_app(heliotrope_config).run(
loader = AppLoader(factory=partial(create_app, heliotrope_config))
app = loader.load()
app.prepare(
heliotrope_config.HOST,
heliotrope_config.PORT,
debug=heliotrope_config.DEBUG,
workers=heliotrope_config.WORKERS,
)
Sanic.serve(app, app_loader=loader)
if __name__ == "__main__": # pragma: no cover

View file

@ -109,7 +109,7 @@ def parse_args(argv: list[str]) -> Namespace:
"--refresh-delay",
type=int,
default=86400,
help="The delay between refresh commonjs (default: 86400)",
help="The delay between refresh ggjs (default: 86400)",
)
config.add_argument(

View file

@ -57,7 +57,7 @@ class HeliotropeConfig(Config):
"INFO_DB_URL": "",
"INDEX_FILE": ["index-english.nozomi"],
"MIRRORING_DELAY": 3600,
"REFRESH_COMMON_JS_DELAY": 86400,
"REFRESH_GG_JS_DELAY": 86400,
"SUPERVISOR_DELAY": 30,
"USE_ATLAS_SEARCH": False,
# Sanic config
@ -99,7 +99,7 @@ class HeliotropeConfig(Config):
GALLERYINFO_DB_URL: str
INFO_DB_URL: str
MIRRORING_DELAY: float
REFRESH_COMMON_JS_DELAY: float
REFRESH_GG_JS_DELAY: float
SUPERVISOR_DELAY: float
INDEX_FILE: list[str]
USE_ATLAS_SEARCH: bool

View file

@ -1,147 +0,0 @@
"""
MIT License
Copyright (c) 2021 SaidBySolo
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
from asyncio.threads import to_thread
from io import StringIO
from typing import cast
from js2py.evaljs import EvalJs # type: ignore
from sanic.log import logger
from heliotrope.domain.info import Info
from heliotrope.request.hitomi import HitomiRequest
from heliotrope.types import HitomiFileJSON
# To get Hitomi.la image address, you can use common.js and gg.js.
# 히토미의 이미지 주소를 가져올려면 common.js와 gg.js를 사용해야 합니다.
class CommonJS:
def __init__(self, polyfill: str) -> None:
self.interpreter = EvalJs()
self.polyfill = polyfill
self.code = ""
@classmethod
async def setup(cls, request: HitomiRequest) -> "CommonJS":
logger.debug(f"Setting up {cls.__name__}")
common_js_code = await request.get_common_js()
gg_js_code = await request.get_gg_js()
# Because it is executed only once for the first time, it is safe to block
# 한번만 실행되기때문에 블로킹 걸려도 괜찮아요.
with open("./heliotrope/interpreter/polyfill.js") as f:
# Polyfill is used because there are functions that js2py does not have.
# 폴리필을 사용하는 이유는 js2py에 없는 함수가 몇몇개 있기 때문이에요.
polyfill = f.read()
instance = cls(polyfill)
# See ./heliotrope/interpreter/function.js
# ./heliotrope/interpreter/function.js 파일 확인해주세요.
with open("./heliotrope/interpreter/function.js") as f:
function = f.read()
instance.interpreter.execute(function)
instance.update_js_code(common_js_code, gg_js_code)
return instance
def get_using_functions(self, code: str) -> str:
# Extract only the functions you want to use.
# 사용할 함수만 추출
export_functions_name = [
"subdomain_from_galleryid",
"subdomain_from_url",
"url_from_url",
"full_path_from_hash",
"real_full_path_from_hash",
"url_from_hash",
"url_from_url_from_hash",
"rewrite_tn_paths",
]
functions: list[str] = []
lines = StringIO(code).readlines()
found = False
gg_found = False
functions.append(self.polyfill)
for func_name in export_functions_name:
for line in lines:
if line.startswith("var gg") and not gg_found:
functions.append(line)
gg_found = True
continue
if found:
functions.append(line)
if line.startswith("}"):
found = False
continue
if line.startswith(f"function {func_name}"):
functions.append(line)
found = True
continue
return "".join(functions)
def parse_gg_js(self, code: str) -> str:
lines = StringIO(code).readlines()
return "".join([line for line in lines if "if (!" not in line])
def update_js_code(self, common_js_code: str, gg_js_code: str) -> None:
self.common_js_code = common_js_code
self.gg_js_code = self.parse_gg_js(gg_js_code)
self.interpreter.execute(self.get_using_functions(common_js_code))
self.interpreter.execute(self.gg_js_code)
async def rewrite_tn_paths(self, html: str) -> str:
return cast(str, await to_thread(self.interpreter.rewrite_tn_paths, html))
async def image_url_from_image(
self, galleryid: int, image: HitomiFileJSON, no_webp: bool
) -> str:
webp = None
if image["hash"] and image["haswebp"] and not no_webp:
webp = "webp"
return cast(
str,
await to_thread(
self.interpreter.url_from_url_from_hash, galleryid, image, webp
),
)
async def convert_thumbnail(self, info: Info) -> dict[str, str]:
thumbnail_url = await self.get_thumbnail(info.id, info.thumbnail.to_dict())
info_dict = cast(dict[str, str], info.to_dict())
info_dict["thumbnail"] = thumbnail_url
return info_dict
async def get_thumbnail(self, galleryid: int, image: HitomiFileJSON) -> str:
return cast(
str,
await to_thread(self.interpreter.getThumbnail, galleryid, image),
)
async def image_urls(
self, galleryid: int, images: list[HitomiFileJSON], no_webp: bool
) -> dict[str, str]:
return cast(
dict[str, str],
await to_thread(self.interpreter.imageUrls, galleryid, images, no_webp),
)

View file

@ -1,17 +0,0 @@
// If you continue to call the interpreter's function, it becomes very slow.
// Leave all the work to the interpreter.
// 계속해서 인터프리터의 함수를 호출을할경우 매우 느려집니다.
// 인터프리터에 모든작업을 맡깁니다.
function imageUrls(galleryid, images, no_webp) {
return images.map(function (image) {
var webp = null
if (image.hash && image.haswebp && !no_webp) {
webp = 'webp'
}
return { 'name': image.name, 'url': url_from_url_from_hash(galleryid, image, webp, undefined, "a") }
})
}
// See https://ltn.hitomi.la/gallery.js
function getThumbnail(galleryid, image) {
return url_from_url_from_hash(galleryid, image, 'webpbigtn', 'webp', 'tn')
}

View file

@ -1,59 +0,0 @@
// https://github.com/uxitten/polyfill/blob/master/string.polyfill.js
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/padStart
// js2py.internals.simplex.JsException: TypeError: 'undefined' is not a function (tried calling property 'padStart' of 'String')
if (!String.prototype.padStart) {
String.prototype.padStart = function padStart(targetLength, padString) {
targetLength = targetLength >> 0; //truncate if number or convert non-number to 0;
padString = String((typeof padString !== 'undefined' ? padString : ' '));
if (this.length > targetLength) {
return String(this);
}
else {
targetLength = targetLength - this.length;
if (targetLength > padString.length) {
padString += padString.repeat(targetLength / padString.length); //append to original to ensure we are longer than needed
}
return padString.slice(0, targetLength) + String(this);
}
};
}
// js2py.internals.simplex.JsException: TypeError: 'undefined' is not a function (tried calling property 'repeat' of 'String')
if (!String.prototype.repeat) {
String.prototype.repeat = function (count) {
'use strict';
if (this == null)
throw new TypeError('can\'t convert ' + this + ' to object');
var str = '' + this;
// To convert string to integer.
count = +count;
// Check NaN
if (count != count)
count = 0;
if (count < 0)
throw new RangeError('repeat count must be non-negative');
if (count == Infinity)
throw new RangeError('repeat count must be less than infinity');
count = Math.floor(count);
if (str.length == 0 || count == 0)
return '';
// Ensuring count is a 31-bit integer allows us to heavily optimize the
// main part. But anyway, most current (August 2014) browsers can't handle
// strings 1 << 28 chars or longer, so:
if (str.length * count >= 1 << 28)
throw new RangeError('repeat count must not overflow maximum string size');
var maxCount = str.length * count;
count = Math.floor(Math.log(count) / Math.log(2));
while (count) {
str += str;
count--;
}
str += str.substring(0, maxCount - str.length);
return str;
}
}

View file

108
heliotrope/js/common.py Normal file
View file

@ -0,0 +1,108 @@
from math import isnan
from re import compile, sub
from typing import cast
from heliotrope.domain import File, Info
from heliotrope.js.gg import GG
class Common:
def __init__(self, gg: GG) -> None:
self.gg = gg
@classmethod
def setup(cls, code: str) -> "Common":
gg = GG(code)
gg.parse()
return cls(gg)
def subdomain_from_url(self, url: str, base: str) -> str:
retval = "b"
if base:
retval = base
b = 16
r = compile(r"\/[0-9a-f]{61}([0-9a-f]{2})([0-9a-f])")
m = r.search(url)
if not m:
return "a"
g = int(m[2] + m[1], b)
if not isnan(g):
retval = chr(97 + self.gg.m(g)) + retval
return retval
def url_from_url(self, url: str, base: str) -> str:
return sub(
r"\/\/..?\.hitomi\.la\/",
"//" + self.subdomain_from_url(url, base) + ".hitomi.la/",
url,
)
def full_path_from_hash(self, hash: str) -> str:
return self.gg.b + self.gg.s(hash) + "/" + hash
def real_full_path_from_hash(self, hash: str) -> str:
return sub(r"^.*(..)(.)$", r"\2/\1/" + hash, hash)
def url_from_hash(self, galleryid: str, image: File, dir: str, ext: str) -> str:
ext = ext or dir or image.name.split(".").pop()
dir = dir or "images"
return (
"https://a.hitomi.la/"
+ dir
+ "/"
+ self.full_path_from_hash(image.hash)
+ "."
+ ext
)
def url_from_url_from_hash(
self, galleryid: str, image: File, dir: str, ext: str, base: str
) -> str:
if "tn" == base:
return self.url_from_url(
"https://a.hitomi.la/"
+ dir
+ "/"
+ self.real_full_path_from_hash(image.hash)
+ "."
+ ext,
base,
)
return self.url_from_url(self.url_from_hash(galleryid, image, dir, ext), base)
def rewrite_tn_paths(self, html: str) -> str:
return sub(
r"//tn\.hitomi\.la/[^/]+/[0-9a-f]/[0-9a-f]{2}/[0-9a-f]{64}",
lambda url: self.url_from_url(url.group(0), "tn"),
html,
)
def get_thumbnail(self, galleryid: str, image: File) -> str:
return self.url_from_url_from_hash(galleryid, image, "webpbigtn", "webp", "tn")
def convert_thumbnail(self, info: Info) -> dict[str, str]:
thumnbnail_url = self.get_thumbnail(str(info.id), info.thumbnail)
info_dict = cast(dict[str, str], info.to_dict())
info_dict["thumbnail"] = thumnbnail_url
return info_dict
def image_urls(
self, galleryid: str, images: list[File], no_webp: bool
) -> list[str]:
return [
self.image_url_from_image(galleryid, image, no_webp) for image in images
]
def image_url_from_image(self, galleryid: str, image: File, no_webp: bool) -> str:
ext = "webp"
if image.hasavif:
ext = "avif"
return self.url_from_url_from_hash(galleryid, image, ext, "", "a")

38
heliotrope/js/gg.py Normal file
View file

@ -0,0 +1,38 @@
import re
from decimal import Decimal
class GG:
def __init__(self, code: str) -> None:
self.code = code
self.case: list[int] = []
self.default_o = 0
self.in_case_o = 0
def parse(self) -> None:
lines = self.code.split("\n")
for line in lines:
if line.startswith("var o = ") and line.endswith(";"):
self.default_o = int(line.removeprefix("var o = ").removesuffix(";"))
if line.startswith("o = ") and line.endswith("; break;"):
self.in_case_o = int(line.removeprefix("o = ").removesuffix("; break;"))
if line.startswith("case "):
matched_int = line.removeprefix("case ").removesuffix(":")
self.case.append(int(matched_int))
if line.startswith("b: "):
self.b = line.removeprefix("b: '").removesuffix("'")
def refresh(self, code: str) -> None:
self.code = code
self.case.clear()
self.parse()
def m(self, g: int) -> int:
if g in self.case:
return self.in_case_o
return self.default_o
def s(self, h: str) -> str:
m = re.search(r"(..)(.)$", h)
assert m
return str(Decimal(int(m[2] + m[1], 16)))

View file

@ -68,11 +68,6 @@ class HitomiRequest:
hitomi_request = cls(request, index_file)
return hitomi_request
async def get_common_js(self) -> str:
request_url = self.ltn_url.with_path("common.js")
response = await self.request.get(request_url, "text")
return str(response.body)
async def get_gg_js(self) -> str:
request_url = self.ltn_url.with_path("gg.js")
response = await self.request.get(request_url, "text")

View file

@ -45,9 +45,7 @@ class HitomiImageView(HTTPMethodView):
if not galleryinfo:
raise NotFound
files = await request.app.ctx.common_js.image_urls(
id, list(map(lambda f: f.to_dict(), galleryinfo.files)), False
)
files = request.app.ctx.common.image_urls(str(id), galleryinfo.files, False)
return json(
{

View file

@ -21,8 +21,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
from typing import Any
from sanic.blueprints import Blueprint
from sanic.exceptions import NotFound
from sanic.response import HTTPResponse, json
@ -51,7 +49,7 @@ class HitomiInfoView(HTTPMethodView):
return json(
{
"status": 200,
**await request.app.ctx.common_js.convert_thumbnail(info),
**request.app.ctx.common.convert_thumbnail(info),
}
)

View file

@ -52,8 +52,7 @@ class HitomiListView(HTTPMethodView):
{
"status": 200,
"list": [
await request.app.ctx.common_js.convert_thumbnail(info)
for info in info_list
request.app.ctx.common.convert_thumbnail(info) for info in info_list
],
"total": total,
}

View file

@ -48,9 +48,7 @@ class HitomiRandomView(HTTPMethodView):
query: list[str] = request.json.get("query") if request.json else []
info = await request.app.ctx.odm.get_random_info(query)
return json(
{"status": 200, **await request.app.ctx.common_js.convert_thumbnail(info)}
)
return json({"status": 200, **request.app.ctx.common.convert_thumbnail(info)})
hitomi_random.add_route(HitomiRandomView.as_view(), "/")

View file

@ -58,7 +58,7 @@ class HitomiSearchView(HTTPMethodView):
{
"status": 200,
"result": [
await request.app.ctx.common_js.convert_thumbnail(result)
request.app.ctx.common.convert_thumbnail(result)
for result in results
],
"count": count,

View file

@ -30,7 +30,7 @@ from sanic.request import Request
from heliotrope.config import HeliotropeConfig
from heliotrope.database.odm import ODM
from heliotrope.database.orm import ORM
from heliotrope.interpreter import CommonJS
from heliotrope.js.common import Common
from heliotrope.request.base import BaseRequest
from heliotrope.request.hitomi import HitomiRequest
@ -40,7 +40,7 @@ class HeliotropeContext(SimpleNamespace):
odm: ODM
request: BaseRequest
hitomi_request: HitomiRequest
common_js: CommonJS
common: Common
class Heliotrope(Sanic):

View file

@ -30,14 +30,14 @@ from heliotrope import __version__
from heliotrope.config import HeliotropeConfig
from heliotrope.database.odm import ODM
from heliotrope.database.orm import ORM
from heliotrope.interpreter import CommonJS
from heliotrope.js.common import Common
from heliotrope.request.base import BaseRequest
from heliotrope.request.hitomi import HitomiRequest
from heliotrope.rest import rest
from heliotrope.sanic import Heliotrope
from heliotrope.tasks.manager import SuperVisor
from heliotrope.tasks.mirroring import MirroringTask
from heliotrope.tasks.refresh import RefreshCommonJS
from heliotrope.tasks.refresh import RefreshggJS
from heliotrope.utils import is_the_first_process
@ -51,7 +51,9 @@ async def startup(heliotrope: Heliotrope, loop: AbstractEventLoop) -> None:
heliotrope.ctx.hitomi_request = await HitomiRequest.setup(
index_file=heliotrope.config.INDEX_FILE
)
heliotrope.ctx.common_js = await CommonJS.setup(heliotrope.ctx.hitomi_request)
heliotrope.ctx.common = Common.setup(
await heliotrope.ctx.hitomi_request.get_gg_js()
)
# Sentry
if heliotrope.config.PRODUCTION:
init(
@ -64,9 +66,7 @@ async def startup(heliotrope: Heliotrope, loop: AbstractEventLoop) -> None:
supervisor = SuperVisor(heliotrope)
if is_the_first_process:
supervisor.add_task(MirroringTask.setup, heliotrope.config.MIRRORING_DELAY)
supervisor.add_task(
RefreshCommonJS.setup, heliotrope.config.REFRESH_COMMON_JS_DELAY
)
supervisor.add_task(RefreshggJS.setup, heliotrope.config.REFRESH_GG_JS_DELAY)
heliotrope.add_task(supervisor.start(heliotrope.config.SUPERVISOR_DELAY))

View file

@ -28,8 +28,6 @@ from typing import NoReturn
from sanic.log import logger
from heliotrope.abc.task import AbstractTask
from heliotrope.interpreter import CommonJS
from heliotrope.request.hitomi import HitomiRequest
from heliotrope.sanic import Heliotrope
from heliotrope.types import SetupTask
@ -40,35 +38,21 @@ from heliotrope.types import SetupTask
# 이를 동적으로 해결합니다.
class RefreshCommonJS(AbstractTask):
def __init__(self, request: HitomiRequest, common_js: CommonJS) -> None:
self.request = request
self.common_js = common_js
class RefreshggJS(AbstractTask):
def __init__(self, app: Heliotrope) -> None:
self.app = app
async def start(self, delay: float) -> NoReturn:
while True:
if not self.common_js.common_js_code:
logger.warning("Common js code is empty")
logger.info("Update common js code and gg js")
self.common_js.update_js_code(
await self.request.get_common_js(), await self.request.get_gg_js()
)
await sleep(delay)
common_js_code = await self.request.get_common_js()
gg_js_code = await self.request.get_gg_js()
if (
self.common_js.common_js_code != common_js_code
or self.common_js.gg_js_code != gg_js_code
):
logger.warning("local common js code is different from remote")
logger.info("Update common js code")
self.common_js.update_js_code(common_js_code, gg_js_code)
await sleep(delay)
renew = await self.app.ctx.hitomi_request.get_gg_js()
if self.app.ctx.common.gg.code != renew:
logger.warning("local gg js code is different from remote")
logger.info("Update gg js code")
self.app.ctx.common.gg.refresh(renew)
@classmethod
def setup(cls, app: Heliotrope, delay: float) -> SetupTask:
logger.debug(f"Setting up {cls.__name__}")
instance = cls(app.ctx.hitomi_request, app.ctx.common_js)
instance = cls(app)
return create_task(instance.start(delay))

View file

@ -1,10 +1,8 @@
aiohttp==3.8.3
sanic[ext]==22.9.0
sentry-sdk==1.10.1
beautifulsoup4==4.11.1
SQLAlchemy[mypy,asyncio]==1.4.42
motor==3.1.1
dnspython==2.2.1
lxml==4.9.1
asyncpg==0.26.0
Js2Py==0.71

View file

@ -3,7 +3,7 @@
"index-english.nozomi"
],
"MIRRORING_DELAY": 3600,
"REFRESH_COMMON_JS_DELAY": 86400,
"REFRESH_GG_JS_DELAY": 86400,
"INFO_DB_URL": "mongodb://root:test@127.0.0.1",
"GALLERYINFO_DB_URL": "postgresql+asyncpg://postgres:test@localhost/test_heliotrope"
}

View file

@ -2,7 +2,6 @@ import json
from asyncio.events import AbstractEventLoop, get_running_loop, new_event_loop
from pytest import fixture, mark
from js2py.pyjs import undefined
from sanic_ext.extensions.http.extension import HTTPExtension
from sanic_ext.extensions.injection.extension import InjectionExtension
from sanic_ext.extensions.openapi.extension import OpenAPIExtension
@ -11,8 +10,8 @@ from sanic_testing import TestManager # type:ignore
from heliotrope.config import HeliotropeConfig
from heliotrope.database.orm.base import mapper_registry
from heliotrope.domain.galleryinfo import Galleryinfo
from heliotrope.domain.info import Info
from heliotrope.interpreter import CommonJS
from heliotrope.domain import Info, Galleryinfo
from heliotrope.js.common import Common
from heliotrope.request.hitomi import HitomiRequest
from heliotrope.sanic import Heliotrope
from heliotrope.server import create_app
@ -58,10 +57,10 @@ def reset_extensions():
@mark.asyncio
async def image_url():
hitomi_request = await HitomiRequest.setup()
common_js = await CommonJS.setup(hitomi_request)
yield common_js.interpreter.url_from_url_from_hash(
galleryinfo["id"], galleryinfo["files"][0], "webp", undefined, "a"
)
code = await hitomi_request.get_gg_js()
common_js = Common.setup(code)
gi = Galleryinfo.from_dict(galleryinfo)
yield common_js.url_from_url_from_hash(str(gi.id), gi.files[0], "webp", "", "a")
@fixture

View file

@ -37,4 +37,4 @@ def test_parse_args_with_config():
assert config.INFO_DB_URL == "mongodb://root:test@127.0.0.1"
assert config.INDEX_FILE == ["index-english.nozomi"]
assert config.MIRRORING_DELAY == 3600
assert config.REFRESH_COMMON_JS_DELAY == 86400
assert config.REFRESH_GG_JS_DELAY == 86400

View file

@ -1,4 +1,5 @@
from pytest import mark
from heliotrope.sanic import Heliotrope
url = "/api/proxy/"