From 71edc65d0d90c2152c91277ee4d45af2c27423a5 Mon Sep 17 00:00:00 2001 From: syuilo Date: Sat, 7 Oct 2023 12:05:17 +0900 Subject: [PATCH] enhance(backend): improve hashtags/trend performance --- CHANGELOG.md | 1 + packages/backend/src/core/FeaturedService.ts | 11 ++ packages/backend/src/core/HashtagService.ts | 101 +++++++++++++++- .../server/api/endpoints/hashtags/trend.ts | 110 ++---------------- 4 files changed, 121 insertions(+), 102 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9ccaa9f1..c4da82e0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ ### Server - Enhance: タイムライン取得時のパフォーマンスを大幅に向上 - Enhance: ハイライト取得時のパフォーマンスを大幅に向上 +- Enhance: トレンドハッシュタグ取得時のパフォーマンスを大幅に向上 - Enhance: 不要なPostgreSQLのインデックスを削除しパフォーマンスを向上 ## 2023.9.3 diff --git a/packages/backend/src/core/FeaturedService.ts b/packages/backend/src/core/FeaturedService.ts index 945c23b0e..62b50ed38 100644 --- a/packages/backend/src/core/FeaturedService.ts +++ b/packages/backend/src/core/FeaturedService.ts @@ -11,6 +11,7 @@ import { bindThis } from '@/decorators.js'; const GLOBAL_NOTES_RANKING_WINDOW = 1000 * 60 * 60 * 24 * 3; // 3日ごと const PER_USER_NOTES_RANKING_WINDOW = 1000 * 60 * 60 * 24 * 7; // 1週間ごと +const HASHTAG_RANKING_WINDOW = 1000 * 60 * 60; // 1時間ごと @Injectable() export class FeaturedService { @@ -88,6 +89,11 @@ export class FeaturedService { return this.updateRankingOf(`featuredPerUserNotesRanking:${userId}`, PER_USER_NOTES_RANKING_WINDOW, noteId, score); } + @bindThis + public updateHashtagsRanking(hashtag: string, score = 1): Promise { + return this.updateRankingOf('featuredHashtagsRanking', HASHTAG_RANKING_WINDOW, hashtag, score); + } + @bindThis public getGlobalNotesRanking(limit: number): Promise { return this.getRankingOf('featuredGlobalNotesRanking', GLOBAL_NOTES_RANKING_WINDOW, limit); @@ -102,4 +108,9 @@ export class FeaturedService { public getPerUserNotesRanking(userId: MiUser['id'], limit: number): Promise { return this.getRankingOf(`featuredPerUserNotesRanking:${userId}`, PER_USER_NOTES_RANKING_WINDOW, limit); } + + @bindThis + public getHashtagsRanking(limit: number): Promise { + return this.getRankingOf('featuredHashtagsRanking', HASHTAG_RANKING_WINDOW, limit); + } } diff --git a/packages/backend/src/core/HashtagService.ts b/packages/backend/src/core/HashtagService.ts index c72c7460f..4900fa90a 100644 --- a/packages/backend/src/core/HashtagService.ts +++ b/packages/backend/src/core/HashtagService.ts @@ -4,6 +4,8 @@ */ import { Inject, Injectable } from '@nestjs/common'; +import * as Redis from 'ioredis'; +import { getLineAndCharacterOfPosition } from 'typescript'; import { DI } from '@/di-symbols.js'; import type { MiUser } from '@/models/User.js'; import { normalizeForSearch } from '@/misc/normalize-for-search.js'; @@ -12,14 +14,19 @@ import type { MiHashtag } from '@/models/Hashtag.js'; import type { HashtagsRepository } from '@/models/_.js'; import { UserEntityService } from '@/core/entities/UserEntityService.js'; import { bindThis } from '@/decorators.js'; +import { FeaturedService } from '@/core/FeaturedService.js'; @Injectable() export class HashtagService { constructor( + @Inject(DI.redis) + private redisClient: Redis.Redis, // TODO: 専用のRedisサーバーを設定できるようにする + @Inject(DI.hashtagsRepository) private hashtagsRepository: HashtagsRepository, private userEntityService: UserEntityService, + private featuredService: FeaturedService, private idService: IdService, ) { } @@ -46,6 +53,9 @@ export class HashtagService { public async updateHashtag(user: { id: MiUser['id']; host: MiUser['host']; }, tag: string, isUserAttached = false, inc = true) { tag = normalizeForSearch(tag); + // TODO: サンプリング + this.updateHashtagsRanking(tag, user.id); + const index = await this.hashtagsRepository.findOneBy({ name: tag }); if (index == null && !inc) return; @@ -85,7 +95,7 @@ export class HashtagService { } } } else { - // 自分が初めてこのタグを使ったなら + // 自分が初めてこのタグを使ったなら if (!index.mentionedUserIds.some(id => id === user.id)) { set.mentionedUserIds = () => `array_append("mentionedUserIds", '${user.id}')`; set.mentionedUsersCount = () => '"mentionedUsersCount" + 1'; @@ -144,4 +154,93 @@ export class HashtagService { } } } + + @bindThis + public async updateHashtagsRanking(hashtag: string, userId: MiUser['id']): Promise { + // TODO: instance.hiddenTagsの考慮 + + // YYYYMMDDHHmm (10分間隔) + const now = new Date(); + now.setMinutes(Math.floor(now.getMinutes() / 10) * 10, 0, 0); + const window = `${now.getUTCFullYear()}${(now.getUTCMonth() + 1).toString().padStart(2, '0')}${now.getUTCDate().toString().padStart(2, '0')}${now.getUTCHours().toString().padStart(2, '0')}${now.getUTCMinutes().toString().padStart(2, '0')}`; + + const exist = await this.redisClient.sismember(`hashtagUsers:${hashtag}`, userId); + if (exist === 1) return; + + this.featuredService.updateHashtagsRanking(hashtag, 1); + + const redisPipeline = this.redisClient.pipeline(); + + // TODO: これらの Set は Bloom Filter を使うようにしても良さそう + + // チャート用 + redisPipeline.sadd(`hashtagUsers:${hashtag}:${window}`, userId); + redisPipeline.expire(`hashtagUsers:${hashtag}:${window}`, + 60 * 60 * 24 * 3, // 3日間 + 'NX', // "NX -- Set expiry only when the key has no expiry" = 有効期限がないときだけ設定 + ); + + // ユニークカウント用 + redisPipeline.sadd(`hashtagUsers:${hashtag}`, userId); + redisPipeline.expire(`hashtagUsers:${hashtag}`, + 60 * 60, // 1時間 + 'NX', // "NX -- Set expiry only when the key has no expiry" = 有効期限がないときだけ設定 + ); + + redisPipeline.exec(); + } + + @bindThis + public async getChart(hashtag: string, range: number): Promise { + const now = new Date(); + now.setMinutes(Math.floor(now.getMinutes() / 10) * 10, 0, 0); + + const redisPipeline = this.redisClient.pipeline(); + + for (let i = 0; i < range; i++) { + const window = `${now.getUTCFullYear()}${(now.getUTCMonth() + 1).toString().padStart(2, '0')}${now.getUTCDate().toString().padStart(2, '0')}${now.getUTCHours().toString().padStart(2, '0')}${now.getUTCMinutes().toString().padStart(2, '0')}`; + redisPipeline.scard(`hashtagUsers:${hashtag}:${window}`); + now.setMinutes(now.getMinutes() - (i * 10), 0, 0); + } + + const result = await redisPipeline.exec(); + + if (result == null) return []; + + return result.map(x => x[1]) as number[]; + } + + @bindThis + public async getCharts(hashtags: string[], range: number): Promise> { + const now = new Date(); + now.setMinutes(Math.floor(now.getMinutes() / 10) * 10, 0, 0); + + const redisPipeline = this.redisClient.pipeline(); + + for (let i = 0; i < range; i++) { + const window = `${now.getUTCFullYear()}${(now.getUTCMonth() + 1).toString().padStart(2, '0')}${now.getUTCDate().toString().padStart(2, '0')}${now.getUTCHours().toString().padStart(2, '0')}${now.getUTCMinutes().toString().padStart(2, '0')}`; + for (const hashtag of hashtags) { + redisPipeline.scard(`hashtagUsers:${hashtag}:${window}`); + } + now.setMinutes(now.getMinutes() - (i * 10), 0, 0); + } + + const result = await redisPipeline.exec(); + + if (result == null) return {}; + + // key is hashtag + const charts = {} as Record; + for (const hashtag of hashtags) { + charts[hashtag] = []; + } + + for (let i = 0; i < range; i++) { + for (let j = 0; j < hashtags.length; j++) { + charts[hashtags[j]].push(result[(i * hashtags.length) + j][1] as number); + } + } + + return charts; + } } diff --git a/packages/backend/src/server/api/endpoints/hashtags/trend.ts b/packages/backend/src/server/api/endpoints/hashtags/trend.ts index 75d4fe381..a69e007a4 100644 --- a/packages/backend/src/server/api/endpoints/hashtags/trend.ts +++ b/packages/backend/src/server/api/endpoints/hashtags/trend.ts @@ -3,29 +3,13 @@ * SPDX-License-Identifier: AGPL-3.0-only */ -import { Brackets } from 'typeorm'; import { Inject, Injectable } from '@nestjs/common'; import { Endpoint } from '@/server/api/endpoint-base.js'; -import type { NotesRepository } from '@/models/_.js'; -import type { MiNote } from '@/models/Note.js'; -import { safeForSql } from '@/misc/safe-for-sql.js'; import { normalizeForSearch } from '@/misc/normalize-for-search.js'; import { MetaService } from '@/core/MetaService.js'; import { DI } from '@/di-symbols.js'; - -/* -トレンドに載るためには「『直近a分間のユニーク投稿数が今からa分前~今からb分前の間のユニーク投稿数のn倍以上』のハッシュタグの上位5位以内に入る」ことが必要 -ユニーク投稿数とはそのハッシュタグと投稿ユーザーのペアのカウントで、例えば同じユーザーが複数回同じハッシュタグを投稿してもそのハッシュタグのユニーク投稿数は1とカウントされる - -..が理想だけどPostgreSQLでどうするのか分からないので単に「直近Aの内に投稿されたユニーク投稿数が多いハッシュタグ」で妥協する -*/ - -const rangeA = 1000 * 60 * 60; // 60分 -//const rangeB = 1000 * 60 * 120; // 2時間 -//const coefficient = 1.25; // 「n倍」の部分 -//const requiredUsers = 3; // 最低何人がそのタグを投稿している必要があるか - -const max = 5; +import { FeaturedService } from '@/core/FeaturedService.js'; +import { HashtagService } from '@/core/HashtagService.js'; export const meta = { tags: ['hashtags'], @@ -71,98 +55,22 @@ export const paramDef = { @Injectable() export default class extends Endpoint { // eslint-disable-line import/no-default-export constructor( - @Inject(DI.notesRepository) - private notesRepository: NotesRepository, - private metaService: MetaService, + private featuredService: FeaturedService, + private hashtagService: HashtagService, ) { super(meta, paramDef, async () => { const instance = await this.metaService.fetch(true); const hiddenTags = instance.hiddenTags.map(t => normalizeForSearch(t)); - const now = new Date(); // 5分単位で丸めた現在日時 - now.setMinutes(Math.round(now.getMinutes() / 5) * 5, 0, 0); + const ranking = await this.featuredService.getHashtagsRanking(10); - const tagNotes = await this.notesRepository.createQueryBuilder('note') - .where('note.createdAt > :date', { date: new Date(now.getTime() - rangeA) }) - .andWhere(new Brackets(qb => { qb - .where('note.visibility = \'public\'') - .orWhere('note.visibility = \'home\''); - })) - .andWhere('note.tags != \'{}\'') - .select(['note.tags', 'note.userId']) - .cache(60000) // 1 min - .getMany(); + const charts = ranking.length === 0 ? {} : await this.hashtagService.getCharts(ranking, 20); - if (tagNotes.length === 0) { - return []; - } - - const tags: { - name: string; - users: MiNote['userId'][]; - }[] = []; - - for (const note of tagNotes) { - for (const tag of note.tags) { - if (hiddenTags.includes(tag)) continue; - - const x = tags.find(x => x.name === tag); - if (x) { - if (!x.users.includes(note.userId)) { - x.users.push(note.userId); - } - } else { - tags.push({ - name: tag, - users: [note.userId], - }); - } - } - } - - // タグを人気順に並べ替え - const hots = tags - .sort((a, b) => b.users.length - a.users.length) - .map(tag => tag.name) - .slice(0, max); - - //#region 2(または3)で話題と判定されたタグそれぞれについて過去の投稿数グラフを取得する - const countPromises: Promise[] = []; - - const range = 20; - - // 10分 - const interval = 1000 * 60 * 10; - - for (let i = 0; i < range; i++) { - countPromises.push(Promise.all(hots.map(tag => this.notesRepository.createQueryBuilder('note') - .select('count(distinct note.userId)') - .where(`'{"${safeForSql(tag) ? tag : 'aichan_kawaii'}"}' <@ note.tags`) - .andWhere('note.createdAt < :lt', { lt: new Date(now.getTime() - (interval * i)) }) - .andWhere('note.createdAt > :gt', { gt: new Date(now.getTime() - (interval * (i + 1))) }) - .cache(60000) // 1 min - .getRawOne() - .then(x => parseInt(x.count, 10)), - ))); - } - - const countsLog = await Promise.all(countPromises); - //#endregion - - const totalCounts = await Promise.all(hots.map(tag => this.notesRepository.createQueryBuilder('note') - .select('count(distinct note.userId)') - .where(`'{"${safeForSql(tag) ? tag : 'aichan_kawaii'}"}' <@ note.tags`) - .andWhere('note.createdAt > :gt', { gt: new Date(now.getTime() - rangeA) }) - .cache(60000 * 60) // 60 min - .getRawOne() - .then(x => parseInt(x.count, 10)), - )); - - const stats = hots.map((tag, i) => ({ + const stats = ranking.map((tag, i) => ({ tag, - chart: countsLog.map(counts => counts[i]), - usersCount: totalCounts[i], + chart: charts[tag], + usersCount: Math.max(...charts[tag]), })); return stats;