iceshrimp/src/mfm/fromHtml.ts

82 lines
1.9 KiB
TypeScript
Raw Normal View History

import { parseFragment, DefaultTreeDocumentFragment } from 'parse5';
import { urlRegex } from './prelude';
2019-01-30 16:56:27 +09:00
export function fromHtml(html: string): string {
const dom = parseFragment(html) as DefaultTreeDocumentFragment;
let text = '';
for (const n of dom.childNodes) {
analyze(n);
}
return text.trim();
function getText(node: any): string {
if (node.nodeName == '#text') return node.value;
if (node.childNodes) {
return node.childNodes.map((n: any) => getText(n)).join('');
}
return '';
}
function analyze(node: any) {
switch (node.nodeName) {
case '#text':
text += node.value;
break;
case 'br':
text += '\n';
break;
case 'a':
const txt = getText(node);
const rel = node.attrs.find((x: any) => x.name == 'rel');
const href = node.attrs.find((x: any) => x.name == 'href');
2020-01-20 01:53:17 +09:00
const _class = node.attrs.find((x: any) => x.name == 'class');
const isHashtag = rel?.value?.match('tag') || _class?.value?.match('hashtag');
// ハッシュタグ / hrefがない / txtがURL
2019-03-15 00:03:24 +09:00
if (isHashtag || !href || href.value == txt) {
text += isHashtag || txt.match(urlRegex) ? txt : `<${txt}>`;
// メンション
2018-12-12 11:47:07 +09:00
} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) {
const part = txt.split('@');
if (part.length == 2) {
//#region ホスト名部分が省略されているので復元する
2018-09-01 23:12:51 +09:00
const acct = `${txt}@${(new URL(href.value)).hostname}`;
text += acct;
//#endregion
} else if (part.length == 3) {
text += txt;
}
// その他
} else {
text += `[${txt}](${href.value})`;
}
break;
case 'p':
text += '\n\n';
if (node.childNodes) {
for (const n of node.childNodes) {
analyze(n);
}
}
break;
default:
if (node.childNodes) {
for (const n of node.childNodes) {
analyze(n);
}
}
break;
}
}
}