Compress and combine emoji data (#5229)
This commit is contained in:
parent
eb5ac23434
commit
fd7f0732fe
22 changed files with 254 additions and 93 deletions
72
app/javascript/mastodon/features/emoji/emoji.js
Normal file
72
app/javascript/mastodon/features/emoji/emoji.js
Normal file
|
@ -0,0 +1,72 @@
|
|||
import unicodeMapping from './emoji_unicode_mapping_light';
|
||||
import Trie from 'substring-trie';
|
||||
|
||||
const trie = new Trie(Object.keys(unicodeMapping));
|
||||
|
||||
const assetHost = process.env.CDN_HOST || '';
|
||||
|
||||
const emojify = (str, customEmojis = {}) => {
|
||||
let rtn = '';
|
||||
for (;;) {
|
||||
let match, i = 0, tag;
|
||||
while (i < str.length && (tag = '<&:'.indexOf(str[i])) === -1 && !(match = trie.search(str.slice(i)))) {
|
||||
i += str.codePointAt(i) < 65536 ? 1 : 2;
|
||||
}
|
||||
let rend, replacement = '';
|
||||
if (i === str.length) {
|
||||
break;
|
||||
} else if (str[i] === ':') {
|
||||
if (!(() => {
|
||||
rend = str.indexOf(':', i + 1) + 1;
|
||||
if (!rend) return false; // no pair of ':'
|
||||
const lt = str.indexOf('<', i + 1);
|
||||
if (!(lt === -1 || lt >= rend)) return false; // tag appeared before closing ':'
|
||||
const shortname = str.slice(i, rend);
|
||||
// now got a replacee as ':shortname:'
|
||||
// if you want additional emoji handler, add statements below which set replacement and return true.
|
||||
if (shortname in customEmojis) {
|
||||
replacement = `<img draggable="false" class="emojione" alt="${shortname}" title="${shortname}" src="${customEmojis[shortname]}" />`;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
})()) rend = ++i;
|
||||
} else if (tag >= 0) { // <, &
|
||||
rend = str.indexOf('>;'[tag], i + 1) + 1;
|
||||
if (!rend) break;
|
||||
i = rend;
|
||||
} else { // matched to unicode emoji
|
||||
const { filename, shortCode } = unicodeMapping[match];
|
||||
const title = shortCode ? `:${shortCode}:` : '';
|
||||
replacement = `<img draggable="false" class="emojione" alt="${match}" title="${title}" src="${assetHost}/emoji/${filename}.svg" />`;
|
||||
rend = i + match.length;
|
||||
}
|
||||
rtn += str.slice(0, i) + replacement;
|
||||
str = str.slice(rend);
|
||||
}
|
||||
return rtn + str;
|
||||
};
|
||||
|
||||
export default emojify;
|
||||
|
||||
export const buildCustomEmojis = customEmojis => {
|
||||
const emojis = [];
|
||||
|
||||
customEmojis.forEach(emoji => {
|
||||
const shortcode = emoji.get('shortcode');
|
||||
const url = emoji.get('static_url');
|
||||
const name = shortcode.replace(':', '');
|
||||
|
||||
emojis.push({
|
||||
id: name,
|
||||
name,
|
||||
short_names: [name],
|
||||
text: '',
|
||||
emoticons: [],
|
||||
keywords: [name],
|
||||
imageUrl: url,
|
||||
custom: true,
|
||||
});
|
||||
});
|
||||
|
||||
return emojis;
|
||||
};
|
90
app/javascript/mastodon/features/emoji/emoji_compressed.js
Normal file
90
app/javascript/mastodon/features/emoji/emoji_compressed.js
Normal file
|
@ -0,0 +1,90 @@
|
|||
// @preval
|
||||
// http://www.unicode.org/Public/emoji/5.0/emoji-test.txt
|
||||
// This file contains the compressed version of the emoji data from
|
||||
// both emoji_map.json and from emoji-mart's emojiIndex and data objects.
|
||||
// It's designed to be emitted in an array format to take up less space
|
||||
// over the wire.
|
||||
|
||||
const { unicodeToFilename } = require('./unicode_to_filename');
|
||||
const { unicodeToUnifiedName } = require('./unicode_to_unified_name');
|
||||
const emojiMap = require('./emoji_map.json');
|
||||
const { emojiIndex } = require('emoji-mart');
|
||||
const emojiMartData = require('emoji-mart/dist/data').default;
|
||||
const excluded = ['®', '©', '™'];
|
||||
const skins = ['🏻', '🏼', '🏽', '🏾', '🏿'];
|
||||
const shortcodeMap = {};
|
||||
|
||||
const shortCodesToEmojiData = {};
|
||||
const emojisWithoutShortCodes = [];
|
||||
|
||||
Object.keys(emojiIndex.emojis).forEach(key => {
|
||||
shortcodeMap[emojiIndex.emojis[key].native] = emojiIndex.emojis[key].id;
|
||||
});
|
||||
|
||||
const stripModifiers = unicode => {
|
||||
skins.forEach(tone => {
|
||||
unicode = unicode.replace(tone, '');
|
||||
});
|
||||
|
||||
return unicode;
|
||||
};
|
||||
|
||||
Object.keys(emojiMap).forEach(key => {
|
||||
if (excluded.includes(key)) {
|
||||
delete emojiMap[key];
|
||||
return;
|
||||
}
|
||||
|
||||
const normalizedKey = stripModifiers(key);
|
||||
let shortcode = shortcodeMap[normalizedKey];
|
||||
|
||||
if (!shortcode) {
|
||||
shortcode = shortcodeMap[normalizedKey + '\uFE0F'];
|
||||
}
|
||||
|
||||
const filename = emojiMap[key];
|
||||
|
||||
const filenameData = [key];
|
||||
|
||||
if (unicodeToFilename(key) !== filename) {
|
||||
// filename can't be derived using unicodeToFilename
|
||||
filenameData.push(filename);
|
||||
}
|
||||
|
||||
if (typeof shortcode === 'undefined') {
|
||||
emojisWithoutShortCodes.push(filenameData);
|
||||
} else {
|
||||
shortCodesToEmojiData[shortcode] = shortCodesToEmojiData[shortcode] || [[]];
|
||||
shortCodesToEmojiData[shortcode][0].push(filenameData);
|
||||
}
|
||||
});
|
||||
|
||||
Object.keys(emojiIndex.emojis).forEach(key => {
|
||||
const { native } = emojiIndex.emojis[key];
|
||||
const { short_names, search, unified } = emojiMartData.emojis[key];
|
||||
if (short_names[0] !== key) {
|
||||
throw new Error('The compresser expects the first short_code to be the ' +
|
||||
'key. It may need to be rewritten if the emoji change such that this ' +
|
||||
'is no longer the case.');
|
||||
}
|
||||
|
||||
short_names.splice(0, 1); // first short name can be inferred from the key
|
||||
|
||||
const searchData = [native, short_names, search];
|
||||
if (unicodeToUnifiedName(native) !== unified) {
|
||||
// unified name can't be derived from unicodeToUnifiedName
|
||||
searchData.push(unified);
|
||||
}
|
||||
|
||||
shortCodesToEmojiData[key].push(searchData);
|
||||
});
|
||||
|
||||
// JSON.parse/stringify is to emulate what @preval is doing and avoid any
|
||||
// inconsistent behavior in dev mode
|
||||
module.exports = JSON.parse(JSON.stringify([
|
||||
shortCodesToEmojiData,
|
||||
emojiMartData.skins,
|
||||
emojiMartData.categories,
|
||||
emojiMartData.short_names,
|
||||
emojisWithoutShortCodes,
|
||||
]));
|
1
app/javascript/mastodon/features/emoji/emoji_map.json
Normal file
1
app/javascript/mastodon/features/emoji/emoji_map.json
Normal file
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,41 @@
|
|||
// The output of this module is designed to mimic emoji-mart's
|
||||
// "data" object, such that we can use it for a light version of emoji-mart's
|
||||
// emojiIndex.search functionality.
|
||||
const { unicodeToUnifiedName } = require('./unicode_to_unified_name');
|
||||
const [ shortCodesToEmojiData, skins, categories, short_names ] = require('./emoji_compressed');
|
||||
|
||||
const emojis = {};
|
||||
|
||||
// decompress
|
||||
Object.keys(shortCodesToEmojiData).forEach((shortCode) => {
|
||||
let [
|
||||
filenameData, // eslint-disable-line no-unused-vars
|
||||
searchData,
|
||||
] = shortCodesToEmojiData[shortCode];
|
||||
let [
|
||||
native,
|
||||
short_names,
|
||||
search,
|
||||
unified,
|
||||
] = searchData;
|
||||
|
||||
if (!unified) {
|
||||
// unified name can be derived from unicodeToUnifiedName
|
||||
unified = unicodeToUnifiedName(native);
|
||||
}
|
||||
|
||||
short_names = [shortCode].concat(short_names);
|
||||
emojis[shortCode] = {
|
||||
native,
|
||||
search,
|
||||
short_names,
|
||||
unified,
|
||||
};
|
||||
});
|
||||
|
||||
module.exports = {
|
||||
emojis,
|
||||
skins,
|
||||
categories,
|
||||
short_names,
|
||||
};
|
|
@ -0,0 +1,154 @@
|
|||
// This code is largely borrowed from:
|
||||
// https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/emoji-index.js
|
||||
|
||||
import data from './emoji_mart_data_light';
|
||||
import { getData, getSanitizedData, intersect } from './emoji_utils';
|
||||
|
||||
let index = {};
|
||||
let emojisList = {};
|
||||
let emoticonsList = {};
|
||||
let previousInclude = [];
|
||||
let previousExclude = [];
|
||||
|
||||
for (let emoji in data.emojis) {
|
||||
let emojiData = data.emojis[emoji],
|
||||
{ short_names, emoticons } = emojiData,
|
||||
id = short_names[0];
|
||||
|
||||
for (let emoticon of (emoticons || [])) {
|
||||
if (!emoticonsList[emoticon]) {
|
||||
emoticonsList[emoticon] = id;
|
||||
}
|
||||
}
|
||||
|
||||
emojisList[id] = getSanitizedData(id);
|
||||
}
|
||||
|
||||
function search(value, { emojisToShowFilter, maxResults, include, exclude, custom = [] } = {}) {
|
||||
maxResults = maxResults || 75;
|
||||
include = include || [];
|
||||
exclude = exclude || [];
|
||||
|
||||
if (custom.length) {
|
||||
for (const emoji of custom) {
|
||||
data.emojis[emoji.id] = getData(emoji);
|
||||
emojisList[emoji.id] = getSanitizedData(emoji);
|
||||
}
|
||||
|
||||
data.categories.push({
|
||||
name: 'Custom',
|
||||
emojis: custom.map(emoji => emoji.id),
|
||||
});
|
||||
}
|
||||
|
||||
let results = null;
|
||||
let pool = data.emojis;
|
||||
|
||||
if (value.length) {
|
||||
if (value === '-' || value === '-1') {
|
||||
return [emojisList['-1']];
|
||||
}
|
||||
|
||||
let values = value.toLowerCase().split(/[\s|,|\-|_]+/);
|
||||
|
||||
if (values.length > 2) {
|
||||
values = [values[0], values[1]];
|
||||
}
|
||||
|
||||
if (include.length || exclude.length) {
|
||||
pool = {};
|
||||
|
||||
if (previousInclude !== include.sort().join(',') || previousExclude !== exclude.sort().join(',')) {
|
||||
previousInclude = include.sort().join(',');
|
||||
previousExclude = exclude.sort().join(',');
|
||||
index = {};
|
||||
}
|
||||
|
||||
for (let category of data.categories) {
|
||||
let isIncluded = include && include.length ? include.indexOf(category.name.toLowerCase()) > -1 : true;
|
||||
let isExcluded = exclude && exclude.length ? exclude.indexOf(category.name.toLowerCase()) > -1 : false;
|
||||
if (!isIncluded || isExcluded) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (let emojiId of category.emojis) {
|
||||
pool[emojiId] = data.emojis[emojiId];
|
||||
}
|
||||
}
|
||||
} else if (previousInclude.length || previousExclude.length) {
|
||||
index = {};
|
||||
}
|
||||
|
||||
let allResults = values.map((value) => {
|
||||
let aPool = pool;
|
||||
let aIndex = index;
|
||||
let length = 0;
|
||||
|
||||
for (let char of value.split('')) {
|
||||
length++;
|
||||
|
||||
aIndex[char] = aIndex[char] || {};
|
||||
aIndex = aIndex[char];
|
||||
|
||||
if (!aIndex.results) {
|
||||
let scores = {};
|
||||
|
||||
aIndex.results = [];
|
||||
aIndex.pool = {};
|
||||
|
||||
for (let id in aPool) {
|
||||
let emoji = aPool[id],
|
||||
{ search } = emoji,
|
||||
sub = value.substr(0, length),
|
||||
subIndex = search.indexOf(sub);
|
||||
|
||||
if (subIndex !== -1) {
|
||||
let score = subIndex + 1;
|
||||
if (sub === id) {
|
||||
score = 0;
|
||||
}
|
||||
|
||||
aIndex.results.push(emojisList[id]);
|
||||
aIndex.pool[id] = emoji;
|
||||
|
||||
scores[id] = score;
|
||||
}
|
||||
}
|
||||
|
||||
aIndex.results.sort((a, b) => {
|
||||
let aScore = scores[a.id],
|
||||
bScore = scores[b.id];
|
||||
|
||||
return aScore - bScore;
|
||||
});
|
||||
}
|
||||
|
||||
aPool = aIndex.pool;
|
||||
}
|
||||
|
||||
return aIndex.results;
|
||||
}).filter(a => a);
|
||||
|
||||
if (allResults.length > 1) {
|
||||
results = intersect(...allResults);
|
||||
} else if (allResults.length) {
|
||||
results = allResults[0];
|
||||
} else {
|
||||
results = [];
|
||||
}
|
||||
}
|
||||
|
||||
if (results) {
|
||||
if (emojisToShowFilter) {
|
||||
results = results.filter((result) => emojisToShowFilter(data.emojis[result.id].unified));
|
||||
}
|
||||
|
||||
if (results && results.length > maxResults) {
|
||||
results = results.slice(0, maxResults);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
export { search };
|
|
@ -0,0 +1,35 @@
|
|||
// A mapping of unicode strings to an object containing the filename
|
||||
// (i.e. the svg filename) and a shortCode intended to be shown
|
||||
// as a "title" attribute in an HTML element (aka tooltip).
|
||||
|
||||
const [
|
||||
shortCodesToEmojiData,
|
||||
skins, // eslint-disable-line no-unused-vars
|
||||
categories, // eslint-disable-line no-unused-vars
|
||||
short_names, // eslint-disable-line no-unused-vars
|
||||
emojisWithoutShortCodes,
|
||||
] = require('./emoji_compressed');
|
||||
const { unicodeToFilename } = require('./unicode_to_filename');
|
||||
|
||||
// decompress
|
||||
const unicodeMapping = {};
|
||||
|
||||
function processEmojiMapData(emojiMapData, shortCode) {
|
||||
let [ native, filename ] = emojiMapData;
|
||||
if (!filename) {
|
||||
// filename name can be derived from unicodeToFilename
|
||||
filename = unicodeToFilename(native);
|
||||
}
|
||||
unicodeMapping[native] = {
|
||||
shortCode: shortCode,
|
||||
filename: filename,
|
||||
};
|
||||
}
|
||||
|
||||
Object.keys(shortCodesToEmojiData).forEach((shortCode) => {
|
||||
let [ filenameData ] = shortCodesToEmojiData[shortCode];
|
||||
filenameData.forEach(emojiMapData => processEmojiMapData(emojiMapData, shortCode));
|
||||
});
|
||||
emojisWithoutShortCodes.forEach(emojiMapData => processEmojiMapData(emojiMapData));
|
||||
|
||||
module.exports = unicodeMapping;
|
137
app/javascript/mastodon/features/emoji/emoji_utils.js
Normal file
137
app/javascript/mastodon/features/emoji/emoji_utils.js
Normal file
|
@ -0,0 +1,137 @@
|
|||
// This code is largely borrowed from:
|
||||
// https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/index.js
|
||||
|
||||
import data from './emoji_mart_data_light';
|
||||
|
||||
const COLONS_REGEX = /^(?:\:([^\:]+)\:)(?:\:skin-tone-(\d)\:)?$/;
|
||||
|
||||
function buildSearch(thisData) {
|
||||
const search = [];
|
||||
|
||||
let addToSearch = (strings, split) => {
|
||||
if (!strings) {
|
||||
return;
|
||||
}
|
||||
|
||||
(Array.isArray(strings) ? strings : [strings]).forEach((string) => {
|
||||
(split ? string.split(/[-|_|\s]+/) : [string]).forEach((s) => {
|
||||
s = s.toLowerCase();
|
||||
|
||||
if (search.indexOf(s) === -1) {
|
||||
search.push(s);
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
addToSearch(thisData.short_names, true);
|
||||
addToSearch(thisData.name, true);
|
||||
addToSearch(thisData.keywords, false);
|
||||
addToSearch(thisData.emoticons, false);
|
||||
|
||||
return search;
|
||||
}
|
||||
|
||||
function unifiedToNative(unified) {
|
||||
let unicodes = unified.split('-'),
|
||||
codePoints = unicodes.map((u) => `0x${u}`);
|
||||
|
||||
return String.fromCodePoint(...codePoints);
|
||||
}
|
||||
|
||||
function sanitize(emoji) {
|
||||
let { name, short_names, skin_tone, skin_variations, emoticons, unified, custom, imageUrl } = emoji,
|
||||
id = emoji.id || short_names[0],
|
||||
colons = `:${id}:`;
|
||||
|
||||
if (custom) {
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
colons,
|
||||
emoticons,
|
||||
custom,
|
||||
imageUrl,
|
||||
};
|
||||
}
|
||||
|
||||
if (skin_tone) {
|
||||
colons += `:skin-tone-${skin_tone}:`;
|
||||
}
|
||||
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
colons,
|
||||
emoticons,
|
||||
unified: unified.toLowerCase(),
|
||||
skin: skin_tone || (skin_variations ? 1 : null),
|
||||
native: unifiedToNative(unified),
|
||||
};
|
||||
}
|
||||
|
||||
function getSanitizedData(emoji) {
|
||||
return sanitize(getData(emoji));
|
||||
}
|
||||
|
||||
function getData(emoji) {
|
||||
let emojiData = {};
|
||||
|
||||
if (typeof emoji === 'string') {
|
||||
let matches = emoji.match(COLONS_REGEX);
|
||||
|
||||
if (matches) {
|
||||
emoji = matches[1];
|
||||
|
||||
}
|
||||
|
||||
if (data.short_names.hasOwnProperty(emoji)) {
|
||||
emoji = data.short_names[emoji];
|
||||
}
|
||||
|
||||
if (data.emojis.hasOwnProperty(emoji)) {
|
||||
emojiData = data.emojis[emoji];
|
||||
}
|
||||
} else if (emoji.custom) {
|
||||
emojiData = emoji;
|
||||
|
||||
emojiData.search = buildSearch({
|
||||
short_names: emoji.short_names,
|
||||
name: emoji.name,
|
||||
keywords: emoji.keywords,
|
||||
emoticons: emoji.emoticons,
|
||||
});
|
||||
|
||||
emojiData.search = emojiData.search.join(',');
|
||||
} else if (emoji.id) {
|
||||
if (data.short_names.hasOwnProperty(emoji.id)) {
|
||||
emoji.id = data.short_names[emoji.id];
|
||||
}
|
||||
|
||||
if (data.emojis.hasOwnProperty(emoji.id)) {
|
||||
emojiData = data.emojis[emoji.id];
|
||||
}
|
||||
}
|
||||
|
||||
emojiData.emoticons = emojiData.emoticons || [];
|
||||
emojiData.variations = emojiData.variations || [];
|
||||
|
||||
if (emojiData.variations && emojiData.variations.length) {
|
||||
emojiData = JSON.parse(JSON.stringify(emojiData));
|
||||
emojiData.unified = emojiData.variations.shift();
|
||||
}
|
||||
|
||||
return emojiData;
|
||||
}
|
||||
|
||||
function intersect(a, b) {
|
||||
let aSet = new Set(a);
|
||||
let bSet = new Set(b);
|
||||
let intersection = new Set(
|
||||
[...aSet].filter(x => bSet.has(x))
|
||||
);
|
||||
|
||||
return Array.from(intersection);
|
||||
}
|
||||
|
||||
export { getData, getSanitizedData, intersect };
|
|
@ -0,0 +1,26 @@
|
|||
// taken from:
|
||||
// https://github.com/twitter/twemoji/blob/47732c7/twemoji-generator.js#L848-L866
|
||||
exports.unicodeToFilename = (str) => {
|
||||
let result = '';
|
||||
let charCode = 0;
|
||||
let p = 0;
|
||||
let i = 0;
|
||||
while (i < str.length) {
|
||||
charCode = str.charCodeAt(i++);
|
||||
if (p) {
|
||||
if (result.length > 0) {
|
||||
result += '-';
|
||||
}
|
||||
result += (0x10000 + ((p - 0xD800) << 10) + (charCode - 0xDC00)).toString(16);
|
||||
p = 0;
|
||||
} else if (0xD800 <= charCode && charCode <= 0xDBFF) {
|
||||
p = charCode;
|
||||
} else {
|
||||
if (result.length > 0) {
|
||||
result += '-';
|
||||
}
|
||||
result += charCode.toString(16);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
|
@ -0,0 +1,17 @@
|
|||
function padLeft(str, num) {
|
||||
while (str.length < num) {
|
||||
str = '0' + str;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
exports.unicodeToUnifiedName = (str) => {
|
||||
let output = '';
|
||||
for (let i = 0; i < str.length; i += 2) {
|
||||
if (i > 0) {
|
||||
output += '-';
|
||||
}
|
||||
output += padLeft(str.codePointAt(i).toString(16).toUpperCase(), 4);
|
||||
}
|
||||
return output;
|
||||
};
|
Loading…
Add table
Add a link
Reference in a new issue