ファイルと画像認識処理の改善 (#5690)

* dimensions制限とリファクタ

* comment

* 不要な変更削除

* use fromFile など

* Add probe-image-size.d.ts

* えーCRLFで作るなよ…

* Update src/@types/probe-image-size.d.ts

Co-Authored-By: Acid Chicken (硫酸鶏) <root@acid-chicken.com>

* fix d.ts

* Update src/@types/probe-image-size.d.ts

Co-Authored-By: Acid Chicken (硫酸鶏) <root@acid-chicken.com>

* Update src/@types/probe-image-size.d.ts

Co-Authored-By: Acid Chicken (硫酸鶏) <root@acid-chicken.com>

* fix

Co-authored-by: Acid Chicken (硫酸鶏) <root@acid-chicken.com>
This commit is contained in:
MeiMei 2020-01-12 16:40:58 +09:00 committed by GitHub
parent d09d06e4cb
commit 9703ba5340
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 456 additions and 154 deletions

27
src/@types/probe-image-size.d.ts vendored Normal file
View file

@ -0,0 +1,27 @@
declare module 'probe-image-size' {
import { ReadStream } from 'fs';
type ProbeOptions = {
retries: 1;
timeout: 30000;
};
type ProbeResult = {
width: number;
height: number;
length?: number;
type: string;
mime: string;
wUnits: 'in' | 'mm' | 'cm' | 'pt' | 'pc' | 'px' | 'em' | 'ex';
hUnits: 'in' | 'mm' | 'cm' | 'pt' | 'pc' | 'px' | 'em' | 'ex';
url?: string;
};
function probeImageSize(src: string | ReadStream, options?: ProbeOptions): Promise<ProbeResult>;
function probeImageSize(src: string | ReadStream, callback: (err: Error | null, result?: ProbeResult) => void): void;
function probeImageSize(src: string | ReadStream, options: ProbeOptions, callback: (err: Error | null, result?: ProbeResult) => void): void;
namespace probeImageSize {} // Hack
export = probeImageSize;
}

View file

@ -1,12 +0,0 @@
import * as fs from 'fs';
import isSvg from 'is-svg';
export default function(path: string) {
try {
const size = fs.statSync(path).size;
if (size > 1 * 1024 * 1024) return false;
return isSvg(fs.readFileSync(path));
} catch {
return false;
}
}

View file

@ -1,31 +0,0 @@
import * as fs from 'fs';
import checkSvg from '../misc/check-svg';
const FileType = require('file-type');
export async function detectMine(path: string) {
return new Promise<[string, string | null]>((res, rej) => {
const readable = fs.createReadStream(path);
readable
.on('error', rej)
.once('data', async (buffer: Buffer) => {
readable.destroy();
const type = await FileType.fromBuffer(buffer);
if (type) {
if (type.mime == 'application/xml' && checkSvg(path)) {
res(['image/svg+xml', 'svg']);
} else {
res([type.mime, type.ext]);
}
} else if (checkSvg(path)) {
res(['image/svg+xml', 'svg']);
} else {
// 種類が同定できなかったら application/octet-stream にする
res(['application/octet-stream', null]);
}
})
.on('end', () => {
// maybe 0 bytes
res(['application/octet-stream', null]);
});
});
}

View file

@ -1,14 +1,14 @@
import { createTemp } from './create-temp';
import { downloadUrl } from './donwload-url';
import { detectMine } from './detect-mine';
import { detectType } from './get-file-info';
export async function detectUrlMine(url: string) {
export async function detectUrlMime(url: string) {
const [path, cleanup] = await createTemp();
try {
await downloadUrl(url, path);
const [type] = await detectMine(path);
return type;
const { mime } = await detectType(path);
return mime;
} finally {
cleanup();
}

201
src/misc/get-file-info.ts Normal file
View file

@ -0,0 +1,201 @@
import * as fs from 'fs';
import * as crypto from 'crypto';
import * as fileType from 'file-type';
import isSvg from 'is-svg';
import * as probeImageSize from 'probe-image-size';
import * as sharp from 'sharp';
export type FileInfo = {
size: number;
md5: string;
type: {
mime: string;
ext: string | null;
};
width?: number;
height?: number;
avgColor?: number[];
warnings: string[];
};
const TYPE_OCTET_STREAM = {
mime: 'application/octet-stream',
ext: null
};
const TYPE_SVG = {
mime: 'image/svg+xml',
ext: 'svg'
};
/**
* Get file information
*/
export async function getFileInfo(path: string): Promise<FileInfo> {
const warnings = [] as string[];
const size = await getFileSize(path);
const md5 = await calcHash(path);
let type = await detectType(path);
// image dimensions
let width: number | undefined;
let height: number | undefined;
if (['image/jpeg', 'image/gif', 'image/png', 'image/apng', 'image/webp', 'image/bmp', 'image/tiff', 'image/svg+xml', 'image/vnd.adobe.photoshop'].includes(type.mime)) {
const imageSize = await detectImageSize(path).catch(e => {
warnings.push(`detectImageSize failed: ${e}`);
return undefined;
});
// うまく判定できない画像は octet-stream にする
if (!imageSize) {
warnings.push(`cannot detect image dimensions`);
type = TYPE_OCTET_STREAM;
} else if (imageSize.wUnits === 'px') {
width = imageSize.width;
height = imageSize.height;
// 制限を超えている画像は octet-stream にする
if (imageSize.width > 16383 || imageSize.height > 16383) {
warnings.push(`image dimensions exceeds limits`);
type = TYPE_OCTET_STREAM;
}
} else {
warnings.push(`unsupported unit type: ${imageSize.wUnits}`);
}
}
// average color
let avgColor: number[] | undefined;
if (['image/jpeg', 'image/gif', 'image/png', 'image/apng', 'image/webp', 'image/svg+xml'].includes(type.mime)) {
avgColor = await calcAvgColor(path).catch(e => {
warnings.push(`calcAvgColor failed: ${e}`);
return undefined;
});
}
return {
size,
md5,
type,
width,
height,
avgColor,
warnings,
};
}
/**
* Detect MIME Type and extension
*/
export async function detectType(path: string) {
// Check 0 byte
const fileSize = await getFileSize(path);
if (fileSize === 0) {
return TYPE_OCTET_STREAM;
}
const type = await fileType.fromFile(path);
if (type) {
// XMLはSVGかもしれない
if (type.mime === 'application/xml' && await checkSvg(path)) {
return TYPE_SVG;
}
return {
mime: type.mime,
ext: type.ext
};
}
// 種類が不明でもSVGかもしれない
if (await checkSvg(path)) {
return TYPE_SVG;
}
// それでも種類が不明なら application/octet-stream にする
return TYPE_OCTET_STREAM;
}
/**
* Check the file is SVG or not
*/
export async function checkSvg(path: string) {
try {
const size = await getFileSize(path);
if (size > 1 * 1024 * 1024) return false;
return isSvg(fs.readFileSync(path));
} catch {
return false;
}
}
/**
* Get file size
*/
export async function getFileSize(path: string): Promise<number> {
return new Promise<number>((res, rej) => {
fs.stat(path, (err, stats) => {
if (err) return rej(err);
res(stats.size);
});
});
}
/**
* Calculate MD5 hash
*/
async function calcHash(path: string): Promise<string> {
return new Promise<string>((res, rej) => {
const readable = fs.createReadStream(path);
const hash = crypto.createHash('md5');
const chunks: Buffer[] = [];
readable
.on('error', rej)
.pipe(hash)
.on('error', rej)
.on('data', chunk => chunks.push(chunk))
.on('end', () => {
const buffer = Buffer.concat(chunks);
res(buffer.toString('hex'));
});
});
}
/**
* Detect dimensions of image
*/
async function detectImageSize(path: string): Promise<{
width: number;
height: number;
wUnits: string;
hUnits: string;
}> {
const readable = fs.createReadStream(path);
const imageSize = await probeImageSize(readable);
readable.destroy();
return imageSize;
}
/**
* Calculate average color of image
*/
async function calcAvgColor(path: string): Promise<number[]> {
const img = sharp(path);
const info = await (img as any).stats();
if (info.isOpaque) {
const r = Math.round(info.channels[0].mean);
const g = Math.round(info.channels[1].mean);
const b = Math.round(info.channels[2].mean);
return [r, g, b];
} else {
return [255, 255, 255];
}
}

View file

@ -1,6 +1,6 @@
import $ from 'cafy';
import define from '../../../define';
import { detectUrlMine } from '../../../../../misc/detect-url-mine';
import { detectUrlMime } from '../../../../../misc/detect-url-mime';
import { Emojis } from '../../../../../models';
import { genId } from '../../../../../misc/gen-id';
import { getConnection } from 'typeorm';
@ -46,7 +46,7 @@ export const meta = {
};
export default define(meta, async (ps, me) => {
const type = await detectUrlMine(ps.url);
const type = await detectUrlMime(ps.url);
const exists = await Emojis.findOne({
name: ps.name,

View file

@ -1,6 +1,6 @@
import $ from 'cafy';
import define from '../../../define';
import { detectUrlMine } from '../../../../../misc/detect-url-mine';
import { detectUrlMime } from '../../../../../misc/detect-url-mime';
import { ID } from '../../../../../misc/cafy-id';
import { Emojis } from '../../../../../models';
import { getConnection } from 'typeorm';
@ -52,7 +52,7 @@ export default define(meta, async (ps) => {
if (emoji == null) throw new ApiError(meta.errors.noSuchEmoji);
const type = await detectUrlMine(ps.url);
const type = await detectUrlMime(ps.url);
await Emojis.update(emoji.id, {
updatedAt: new Date(),

View file

@ -8,7 +8,7 @@ import { contentDisposition } from '../../misc/content-disposition';
import { DriveFiles } from '../../models';
import { InternalStorage } from '../../services/drive/internal-storage';
import { downloadUrl } from '../../misc/donwload-url';
import { detectMine } from '../../misc/detect-mine';
import { detectType } from '../../misc/get-file-info';
import { convertToJpeg, convertToPng } from '../../services/drive/image-processor';
import { GenerateVideoThumbnail } from '../../services/drive/generate-video-thumbnail';
@ -52,15 +52,15 @@ export default async function(ctx: Koa.Context) {
try {
await downloadUrl(file.uri, path);
const [type, ext] = await detectMine(path);
const { mime, ext } = await detectType(path);
const convertFile = async () => {
if (isThumbnail) {
if (['image/jpeg', 'image/webp'].includes(type)) {
if (['image/jpeg', 'image/webp'].includes(mime)) {
return await convertToJpeg(path, 498, 280);
} else if (['image/png'].includes(type)) {
} else if (['image/png'].includes(mime)) {
return await convertToPng(path, 498, 280);
} else if (type.startsWith('video/')) {
} else if (mime.startsWith('video/')) {
return await GenerateVideoThumbnail(path);
}
}
@ -68,7 +68,7 @@ export default async function(ctx: Koa.Context) {
return {
data: fs.readFileSync(path),
ext,
type,
type: mime,
};
};

View file

@ -4,7 +4,7 @@ import { serverLogger } from '..';
import { IImage, convertToPng, convertToJpeg } from '../../services/drive/image-processor';
import { createTemp } from '../../misc/create-temp';
import { downloadUrl } from '../../misc/donwload-url';
import { detectMine } from '../../misc/detect-mine';
import { detectType } from '../../misc/get-file-info';
export async function proxyMedia(ctx: Koa.Context) {
const url = 'url' in ctx.query ? ctx.query.url : 'https://' + ctx.params.url;
@ -15,21 +15,21 @@ export async function proxyMedia(ctx: Koa.Context) {
try {
await downloadUrl(url, path);
const [type, ext] = await detectMine(path);
const { mime, ext } = await detectType(path);
if (!type.startsWith('image/')) throw 403;
if (!mime.startsWith('image/')) throw 403;
let image: IImage;
if ('static' in ctx.query && ['image/png', 'image/gif', 'image/apng', 'image/vnd.mozilla.apng'].includes(type)) {
if ('static' in ctx.query && ['image/png', 'image/gif', 'image/apng', 'image/vnd.mozilla.apng'].includes(mime)) {
image = await convertToPng(path, 498, 280);
} else if ('preview' in ctx.query && ['image/jpeg', 'image/png', 'image/gif', 'image/apng', 'image/vnd.mozilla.apng'].includes(type)) {
} else if ('preview' in ctx.query && ['image/jpeg', 'image/png', 'image/gif', 'image/apng', 'image/vnd.mozilla.apng'].includes(mime)) {
image = await convertToJpeg(path, 200, 200);
} else {
image = {
data: fs.readFileSync(path),
ext,
type,
type: mime,
};
}

View file

@ -1,9 +1,6 @@
import { Buffer } from 'buffer';
import * as fs from 'fs';
import * as crypto from 'crypto';
import { v4 as uuid } from 'uuid';
import * as sharp from 'sharp';
import { publishMainStream, publishDriveStream } from '../stream';
import { deleteFile } from './delete-file';
@ -12,7 +9,7 @@ import { GenerateVideoThumbnail } from './generate-video-thumbnail';
import { driveLogger } from './logger';
import { IImage, convertToJpeg, convertToWebp, convertToPng } from './image-processor';
import { contentDisposition } from '../../misc/content-disposition';
import { detectMine } from '../../misc/detect-mine';
import { getFileInfo } from '../../misc/get-file-info';
import { DriveFiles, DriveFolders, Users, Instances, UserProfiles } from '../../models';
import { InternalStorage } from './internal-storage';
import { DriveFile } from '../../models/entities/drive-file';
@ -271,41 +268,16 @@ export default async function(
uri: string | null = null,
sensitive: boolean | null = null
): Promise<DriveFile> {
// Calc md5 hash
const calcHash = new Promise<string>((res, rej) => {
const readable = fs.createReadStream(path);
const hash = crypto.createHash('md5');
const chunks: Buffer[] = [];
readable
.on('error', rej)
.pipe(hash)
.on('error', rej)
.on('data', chunk => chunks.push(chunk))
.on('end', () => {
const buffer = Buffer.concat(chunks);
res(buffer.toString('hex'));
});
});
// Get file size
const getFileSize = new Promise<number>((res, rej) => {
fs.stat(path, (err, stats) => {
if (err) return rej(err);
res(stats.size);
});
});
const [hash, [mime, ext], size] = await Promise.all([calcHash, detectMine(path), getFileSize]);
logger.info(`hash: ${hash}, mime: ${mime}, ext: ${ext}, size: ${size}`);
const info = await getFileInfo(path);
logger.info(`${JSON.stringify(info)}`);
// detect name
const detectedName = name || (ext ? `untitled.${ext}` : 'untitled');
const detectedName = name || (info.type.ext ? `untitled.${info.type.ext}` : 'untitled');
if (!force) {
// Check if there is a file with the same hash
const much = await DriveFiles.findOne({
md5: hash,
md5: info.md5,
userId: user.id,
});
@ -325,7 +297,7 @@ export default async function(
logger.debug(`drive usage is ${usage} (max: ${driveCapacity})`);
// If usage limit exceeded
if (usage + size > driveCapacity) {
if (usage + info.size > driveCapacity) {
if (Users.isLocalUser(user)) {
throw new Error('no-free-space');
} else {
@ -351,57 +323,24 @@ export default async function(
return driveFolder;
};
const properties: {[key: string]: any} = {};
const properties: {
width?: number;
height?: number;
avgColor?: string;
} = {};
let propPromises: Promise<void>[] = [];
if (info.width) {
properties['width'] = info.width;
properties['height'] = info.height;
}
const isImage = ['image/jpeg', 'image/gif', 'image/png', 'image/apng', 'image/vnd.mozilla.apng', 'image/webp', 'image/svg+xml'].includes(mime);
if (isImage) {
const img = sharp(path);
// Calc width and height
const calcWh = async () => {
logger.debug('calculating image width and height...');
// Calculate width and height
const meta = await img.metadata();
logger.debug(`image width and height is calculated: ${meta.width}, ${meta.height}`);
properties['width'] = meta.width;
properties['height'] = meta.height;
};
// Calc average color
const calcAvg = async () => {
logger.debug('calculating average color...');
try {
const info = await img.stats();
if (info.isOpaque) {
const r = Math.round(info.channels[0].mean);
const g = Math.round(info.channels[1].mean);
const b = Math.round(info.channels[2].mean);
logger.debug(`average color is calculated: ${r}, ${g}, ${b}`);
properties['avgColor'] = `rgb(${r},${g},${b})`;
} else {
logger.debug(`this image is not opaque so average color is 255, 255, 255`);
properties['avgColor'] = `rgb(255,255,255)`;
}
} catch (e) { }
};
propPromises = [calcWh(), calcAvg()];
if (info.avgColor) {
properties['avgColor'] = `rgb(${info.avgColor.join(',')}`;
}
const profile = await UserProfiles.findOne(user.id);
const [folder] = await Promise.all([fetchFolder(), Promise.all(propPromises)]);
const folder = await fetchFolder();
let file = new DriveFile();
file.id = genId();
@ -436,9 +375,9 @@ export default async function(
if (isLink) {
try {
file.size = 0;
file.md5 = hash;
file.md5 = info.md5;
file.name = detectedName;
file.type = mime;
file.type = info.type.mime;
file.storedInternal = false;
file = await DriveFiles.save(file);
@ -457,7 +396,7 @@ export default async function(
}
}
} else {
file = await (save(file, path, detectedName, mime, hash, size));
file = await (save(file, path, detectedName, info.type.mime, info.md5, info.size));
}
logger.succ(`drive file has been created ${file.id}`);