0
0
Fork 0

Compress and combine emoji data (#5229)

This commit is contained in:
Nolan Lawson 2017-10-05 18:42:34 -07:00 committed by Eugen Rochko
parent eb5ac23434
commit fd7f0732fe
22 changed files with 254 additions and 93 deletions

View file

@ -6,7 +6,7 @@ import Overlay from 'react-overlays/lib/Overlay';
import classNames from 'classnames';
import ImmutablePropTypes from 'react-immutable-proptypes';
import detectPassiveEvents from 'detect-passive-events';
import { buildCustomEmojis } from '../../../emoji';
import { buildCustomEmojis } from '../../emoji/emoji';
const messages = defineMessages({
emoji: { id: 'emoji_button.label', defaultMessage: 'Insert emoji' },

View file

@ -0,0 +1,72 @@
import unicodeMapping from './emoji_unicode_mapping_light';
import Trie from 'substring-trie';
const trie = new Trie(Object.keys(unicodeMapping));
const assetHost = process.env.CDN_HOST || '';
const emojify = (str, customEmojis = {}) => {
let rtn = '';
for (;;) {
let match, i = 0, tag;
while (i < str.length && (tag = '<&:'.indexOf(str[i])) === -1 && !(match = trie.search(str.slice(i)))) {
i += str.codePointAt(i) < 65536 ? 1 : 2;
}
let rend, replacement = '';
if (i === str.length) {
break;
} else if (str[i] === ':') {
if (!(() => {
rend = str.indexOf(':', i + 1) + 1;
if (!rend) return false; // no pair of ':'
const lt = str.indexOf('<', i + 1);
if (!(lt === -1 || lt >= rend)) return false; // tag appeared before closing ':'
const shortname = str.slice(i, rend);
// now got a replacee as ':shortname:'
// if you want additional emoji handler, add statements below which set replacement and return true.
if (shortname in customEmojis) {
replacement = `<img draggable="false" class="emojione" alt="${shortname}" title="${shortname}" src="${customEmojis[shortname]}" />`;
return true;
}
return false;
})()) rend = ++i;
} else if (tag >= 0) { // <, &
rend = str.indexOf('>;'[tag], i + 1) + 1;
if (!rend) break;
i = rend;
} else { // matched to unicode emoji
const { filename, shortCode } = unicodeMapping[match];
const title = shortCode ? `:${shortCode}:` : '';
replacement = `<img draggable="false" class="emojione" alt="${match}" title="${title}" src="${assetHost}/emoji/${filename}.svg" />`;
rend = i + match.length;
}
rtn += str.slice(0, i) + replacement;
str = str.slice(rend);
}
return rtn + str;
};
export default emojify;
export const buildCustomEmojis = customEmojis => {
const emojis = [];
customEmojis.forEach(emoji => {
const shortcode = emoji.get('shortcode');
const url = emoji.get('static_url');
const name = shortcode.replace(':', '');
emojis.push({
id: name,
name,
short_names: [name],
text: '',
emoticons: [],
keywords: [name],
imageUrl: url,
custom: true,
});
});
return emojis;
};

View file

@ -0,0 +1,90 @@
// @preval
// http://www.unicode.org/Public/emoji/5.0/emoji-test.txt
// This file contains the compressed version of the emoji data from
// both emoji_map.json and from emoji-mart's emojiIndex and data objects.
// It's designed to be emitted in an array format to take up less space
// over the wire.
const { unicodeToFilename } = require('./unicode_to_filename');
const { unicodeToUnifiedName } = require('./unicode_to_unified_name');
const emojiMap = require('./emoji_map.json');
const { emojiIndex } = require('emoji-mart');
const emojiMartData = require('emoji-mart/dist/data').default;
const excluded = ['®', '©', '™'];
const skins = ['🏻', '🏼', '🏽', '🏾', '🏿'];
const shortcodeMap = {};
const shortCodesToEmojiData = {};
const emojisWithoutShortCodes = [];
Object.keys(emojiIndex.emojis).forEach(key => {
shortcodeMap[emojiIndex.emojis[key].native] = emojiIndex.emojis[key].id;
});
const stripModifiers = unicode => {
skins.forEach(tone => {
unicode = unicode.replace(tone, '');
});
return unicode;
};
Object.keys(emojiMap).forEach(key => {
if (excluded.includes(key)) {
delete emojiMap[key];
return;
}
const normalizedKey = stripModifiers(key);
let shortcode = shortcodeMap[normalizedKey];
if (!shortcode) {
shortcode = shortcodeMap[normalizedKey + '\uFE0F'];
}
const filename = emojiMap[key];
const filenameData = [key];
if (unicodeToFilename(key) !== filename) {
// filename can't be derived using unicodeToFilename
filenameData.push(filename);
}
if (typeof shortcode === 'undefined') {
emojisWithoutShortCodes.push(filenameData);
} else {
shortCodesToEmojiData[shortcode] = shortCodesToEmojiData[shortcode] || [[]];
shortCodesToEmojiData[shortcode][0].push(filenameData);
}
});
Object.keys(emojiIndex.emojis).forEach(key => {
const { native } = emojiIndex.emojis[key];
const { short_names, search, unified } = emojiMartData.emojis[key];
if (short_names[0] !== key) {
throw new Error('The compresser expects the first short_code to be the ' +
'key. It may need to be rewritten if the emoji change such that this ' +
'is no longer the case.');
}
short_names.splice(0, 1); // first short name can be inferred from the key
const searchData = [native, short_names, search];
if (unicodeToUnifiedName(native) !== unified) {
// unified name can't be derived from unicodeToUnifiedName
searchData.push(unified);
}
shortCodesToEmojiData[key].push(searchData);
});
// JSON.parse/stringify is to emulate what @preval is doing and avoid any
// inconsistent behavior in dev mode
module.exports = JSON.parse(JSON.stringify([
shortCodesToEmojiData,
emojiMartData.skins,
emojiMartData.categories,
emojiMartData.short_names,
emojisWithoutShortCodes,
]));

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,41 @@
// The output of this module is designed to mimic emoji-mart's
// "data" object, such that we can use it for a light version of emoji-mart's
// emojiIndex.search functionality.
const { unicodeToUnifiedName } = require('./unicode_to_unified_name');
const [ shortCodesToEmojiData, skins, categories, short_names ] = require('./emoji_compressed');
const emojis = {};
// decompress
Object.keys(shortCodesToEmojiData).forEach((shortCode) => {
let [
filenameData, // eslint-disable-line no-unused-vars
searchData,
] = shortCodesToEmojiData[shortCode];
let [
native,
short_names,
search,
unified,
] = searchData;
if (!unified) {
// unified name can be derived from unicodeToUnifiedName
unified = unicodeToUnifiedName(native);
}
short_names = [shortCode].concat(short_names);
emojis[shortCode] = {
native,
search,
short_names,
unified,
};
});
module.exports = {
emojis,
skins,
categories,
short_names,
};

View file

@ -0,0 +1,154 @@
// This code is largely borrowed from:
// https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/emoji-index.js
import data from './emoji_mart_data_light';
import { getData, getSanitizedData, intersect } from './emoji_utils';
let index = {};
let emojisList = {};
let emoticonsList = {};
let previousInclude = [];
let previousExclude = [];
for (let emoji in data.emojis) {
let emojiData = data.emojis[emoji],
{ short_names, emoticons } = emojiData,
id = short_names[0];
for (let emoticon of (emoticons || [])) {
if (!emoticonsList[emoticon]) {
emoticonsList[emoticon] = id;
}
}
emojisList[id] = getSanitizedData(id);
}
function search(value, { emojisToShowFilter, maxResults, include, exclude, custom = [] } = {}) {
maxResults = maxResults || 75;
include = include || [];
exclude = exclude || [];
if (custom.length) {
for (const emoji of custom) {
data.emojis[emoji.id] = getData(emoji);
emojisList[emoji.id] = getSanitizedData(emoji);
}
data.categories.push({
name: 'Custom',
emojis: custom.map(emoji => emoji.id),
});
}
let results = null;
let pool = data.emojis;
if (value.length) {
if (value === '-' || value === '-1') {
return [emojisList['-1']];
}
let values = value.toLowerCase().split(/[\s|,|\-|_]+/);
if (values.length > 2) {
values = [values[0], values[1]];
}
if (include.length || exclude.length) {
pool = {};
if (previousInclude !== include.sort().join(',') || previousExclude !== exclude.sort().join(',')) {
previousInclude = include.sort().join(',');
previousExclude = exclude.sort().join(',');
index = {};
}
for (let category of data.categories) {
let isIncluded = include && include.length ? include.indexOf(category.name.toLowerCase()) > -1 : true;
let isExcluded = exclude && exclude.length ? exclude.indexOf(category.name.toLowerCase()) > -1 : false;
if (!isIncluded || isExcluded) {
continue;
}
for (let emojiId of category.emojis) {
pool[emojiId] = data.emojis[emojiId];
}
}
} else if (previousInclude.length || previousExclude.length) {
index = {};
}
let allResults = values.map((value) => {
let aPool = pool;
let aIndex = index;
let length = 0;
for (let char of value.split('')) {
length++;
aIndex[char] = aIndex[char] || {};
aIndex = aIndex[char];
if (!aIndex.results) {
let scores = {};
aIndex.results = [];
aIndex.pool = {};
for (let id in aPool) {
let emoji = aPool[id],
{ search } = emoji,
sub = value.substr(0, length),
subIndex = search.indexOf(sub);
if (subIndex !== -1) {
let score = subIndex + 1;
if (sub === id) {
score = 0;
}
aIndex.results.push(emojisList[id]);
aIndex.pool[id] = emoji;
scores[id] = score;
}
}
aIndex.results.sort((a, b) => {
let aScore = scores[a.id],
bScore = scores[b.id];
return aScore - bScore;
});
}
aPool = aIndex.pool;
}
return aIndex.results;
}).filter(a => a);
if (allResults.length > 1) {
results = intersect(...allResults);
} else if (allResults.length) {
results = allResults[0];
} else {
results = [];
}
}
if (results) {
if (emojisToShowFilter) {
results = results.filter((result) => emojisToShowFilter(data.emojis[result.id].unified));
}
if (results && results.length > maxResults) {
results = results.slice(0, maxResults);
}
}
return results;
}
export { search };

View file

@ -0,0 +1,35 @@
// A mapping of unicode strings to an object containing the filename
// (i.e. the svg filename) and a shortCode intended to be shown
// as a "title" attribute in an HTML element (aka tooltip).
const [
shortCodesToEmojiData,
skins, // eslint-disable-line no-unused-vars
categories, // eslint-disable-line no-unused-vars
short_names, // eslint-disable-line no-unused-vars
emojisWithoutShortCodes,
] = require('./emoji_compressed');
const { unicodeToFilename } = require('./unicode_to_filename');
// decompress
const unicodeMapping = {};
function processEmojiMapData(emojiMapData, shortCode) {
let [ native, filename ] = emojiMapData;
if (!filename) {
// filename name can be derived from unicodeToFilename
filename = unicodeToFilename(native);
}
unicodeMapping[native] = {
shortCode: shortCode,
filename: filename,
};
}
Object.keys(shortCodesToEmojiData).forEach((shortCode) => {
let [ filenameData ] = shortCodesToEmojiData[shortCode];
filenameData.forEach(emojiMapData => processEmojiMapData(emojiMapData, shortCode));
});
emojisWithoutShortCodes.forEach(emojiMapData => processEmojiMapData(emojiMapData));
module.exports = unicodeMapping;

View file

@ -0,0 +1,137 @@
// This code is largely borrowed from:
// https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/index.js
import data from './emoji_mart_data_light';
const COLONS_REGEX = /^(?:\:([^\:]+)\:)(?:\:skin-tone-(\d)\:)?$/;
function buildSearch(thisData) {
const search = [];
let addToSearch = (strings, split) => {
if (!strings) {
return;
}
(Array.isArray(strings) ? strings : [strings]).forEach((string) => {
(split ? string.split(/[-|_|\s]+/) : [string]).forEach((s) => {
s = s.toLowerCase();
if (search.indexOf(s) === -1) {
search.push(s);
}
});
});
};
addToSearch(thisData.short_names, true);
addToSearch(thisData.name, true);
addToSearch(thisData.keywords, false);
addToSearch(thisData.emoticons, false);
return search;
}
function unifiedToNative(unified) {
let unicodes = unified.split('-'),
codePoints = unicodes.map((u) => `0x${u}`);
return String.fromCodePoint(...codePoints);
}
function sanitize(emoji) {
let { name, short_names, skin_tone, skin_variations, emoticons, unified, custom, imageUrl } = emoji,
id = emoji.id || short_names[0],
colons = `:${id}:`;
if (custom) {
return {
id,
name,
colons,
emoticons,
custom,
imageUrl,
};
}
if (skin_tone) {
colons += `:skin-tone-${skin_tone}:`;
}
return {
id,
name,
colons,
emoticons,
unified: unified.toLowerCase(),
skin: skin_tone || (skin_variations ? 1 : null),
native: unifiedToNative(unified),
};
}
function getSanitizedData(emoji) {
return sanitize(getData(emoji));
}
function getData(emoji) {
let emojiData = {};
if (typeof emoji === 'string') {
let matches = emoji.match(COLONS_REGEX);
if (matches) {
emoji = matches[1];
}
if (data.short_names.hasOwnProperty(emoji)) {
emoji = data.short_names[emoji];
}
if (data.emojis.hasOwnProperty(emoji)) {
emojiData = data.emojis[emoji];
}
} else if (emoji.custom) {
emojiData = emoji;
emojiData.search = buildSearch({
short_names: emoji.short_names,
name: emoji.name,
keywords: emoji.keywords,
emoticons: emoji.emoticons,
});
emojiData.search = emojiData.search.join(',');
} else if (emoji.id) {
if (data.short_names.hasOwnProperty(emoji.id)) {
emoji.id = data.short_names[emoji.id];
}
if (data.emojis.hasOwnProperty(emoji.id)) {
emojiData = data.emojis[emoji.id];
}
}
emojiData.emoticons = emojiData.emoticons || [];
emojiData.variations = emojiData.variations || [];
if (emojiData.variations && emojiData.variations.length) {
emojiData = JSON.parse(JSON.stringify(emojiData));
emojiData.unified = emojiData.variations.shift();
}
return emojiData;
}
function intersect(a, b) {
let aSet = new Set(a);
let bSet = new Set(b);
let intersection = new Set(
[...aSet].filter(x => bSet.has(x))
);
return Array.from(intersection);
}
export { getData, getSanitizedData, intersect };

View file

@ -0,0 +1,26 @@
// taken from:
// https://github.com/twitter/twemoji/blob/47732c7/twemoji-generator.js#L848-L866
exports.unicodeToFilename = (str) => {
let result = '';
let charCode = 0;
let p = 0;
let i = 0;
while (i < str.length) {
charCode = str.charCodeAt(i++);
if (p) {
if (result.length > 0) {
result += '-';
}
result += (0x10000 + ((p - 0xD800) << 10) + (charCode - 0xDC00)).toString(16);
p = 0;
} else if (0xD800 <= charCode && charCode <= 0xDBFF) {
p = charCode;
} else {
if (result.length > 0) {
result += '-';
}
result += charCode.toString(16);
}
}
return result;
};

View file

@ -0,0 +1,17 @@
function padLeft(str, num) {
while (str.length < num) {
str = '0' + str;
}
return str;
}
exports.unicodeToUnifiedName = (str) => {
let output = '';
for (let i = 0; i < str.length; i += 2) {
if (i > 0) {
output += '-';
}
output += padLeft(str.codePointAt(i).toString(16).toUpperCase(), 4);
}
return output;
};