0
0
Fork 0

Change how hashtags are normalized (#18795)

* Change how hashtags are normalized

* Fix tests
This commit is contained in:
Eugen Rochko 2022-07-13 15:03:28 +02:00 committed by GitHub
parent 12ed2d793b
commit e7aa2be828
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 193 additions and 51 deletions

View file

@ -892,6 +892,34 @@ const startWorker = async (workerId) => {
return arr;
};
/**
* See app/lib/ascii_folder.rb for the canon definitions
* of these constants
*/
const NON_ASCII_CHARS = 'ÀÁÂÃÄÅàáâãäåĀāĂ㥹ÇçĆćĈĉĊċČčÐðĎďĐđÈÉÊËèéêëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħÌÍÎÏìíîïĨĩĪīĬĭĮįİıĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓÔÕÖØòóôõöøŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšſŢţŤťŦŧÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųŴŵÝýÿŶŷŸŹźŻżŽž';
const EQUIVALENT_ASCII_CHARS = 'AAAAAAaaaaaaAaAaAaCcCcCcCcCcDdDdDdEEEEeeeeEeEeEeEeEeGgGgGgGgHhHhIIIIiiiiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnNnnNnOOOOOOooooooOoOoOoRrRrRrSsSsSsSssTtTtTtUUUUuuuuUuUuUuUuUuUuWwYyyYyYZzZzZz';
/**
* @param {string} str
* @return {string}
*/
const foldToASCII = str => {
const regex = new RegExp(NON_ASCII_CHARS.split('').join('|'), 'g');
return str.replace(regex, match => {
const index = NON_ASCII_CHARS.indexOf(match);
return EQUIVALENT_ASCII_CHARS[index];
});
};
/**
* @param {string} str
* @return {string}
*/
const normalizeHashtag = str => {
return foldToASCII(str.normalize('NFKC').toLowerCase()).replace(/[^\p{L}\p{N}_\u00b7\u200c]/gu, '');
};
/**
* @param {any} req
* @param {string} name
@ -968,7 +996,7 @@ const startWorker = async (workerId) => {
reject('No tag for stream provided');
} else {
resolve({
channelIds: [`timeline:hashtag:${params.tag.toLowerCase()}`],
channelIds: [`timeline:hashtag:${normalizeHashtag(params.tag)}`],
options: { needsFiltering: true },
});
}
@ -979,7 +1007,7 @@ const startWorker = async (workerId) => {
reject('No tag for stream provided');
} else {
resolve({
channelIds: [`timeline:hashtag:${params.tag.toLowerCase()}:local`],
channelIds: [`timeline:hashtag:${normalizeHashtag(params.tag)}:local`],
options: { needsFiltering: true },
});
}