feat(backend): elasticsearchで検索できるように (MisskeyIO#661)

Co-authored-by: 皐月なふ (Nafu Satsuki) <satsuki@nafusoft.dev>
This commit is contained in:
まっちゃてぃー。 2024-07-14 07:18:50 +09:00 committed by GitHub
parent 0375599e50
commit a77291be57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 235 additions and 7 deletions

View file

@ -72,6 +72,7 @@
"@bull-board/fastify": "5.18.1",
"@bull-board/ui": "5.18.1",
"@discordapp/twemoji": "15.0.3",
"@elastic/elasticsearch": "^8.14.0",
"@fastify/accepts": "4.3.0",
"@fastify/cookie": "9.3.1",
"@fastify/cors": "9.0.1",

View file

@ -8,6 +8,7 @@ import { Global, Inject, Module } from '@nestjs/common';
import * as Redis from 'ioredis';
import { DataSource } from 'typeorm';
import { MeiliSearch } from 'meilisearch';
import { Client as ElasticSearch } from '@elastic/elasticsearch';
import { DI } from './di-symbols.js';
import { Config, loadConfig } from './config.js';
import { createPostgresDataSource } from './postgres.js';
@ -44,6 +45,30 @@ const $meilisearch: Provider = {
inject: [DI.config],
};
const $elasticsearch: Provider = {
provide: DI.elasticsearch,
useFactory: (config: Config) => {
if (config.elasticsearch) {
return new ElasticSearch({
nodes: {
url: new URL(`${config.elasticsearch.ssl ? 'https' : 'http'}://${config.elasticsearch.host}:${config.elasticsearch.port}`),
ssl: {
rejectUnauthorized: config.elasticsearch.rejectUnauthorized,
},
},
auth: (config.elasticsearch.user && config.elasticsearch.pass) ? {
username: config.elasticsearch.user,
password: config.elasticsearch.pass,
} : undefined,
pingTimeout: 30000,
});
} else {
return null;
}
},
inject: [DI.config],
};
const $redis: Provider = {
provide: DI.redis,
useFactory: (config: Config) => {
@ -160,8 +185,8 @@ const $redisForTimelines: Provider = {
@Global()
@Module({
imports: [RepositoryModule],
providers: [$config, $db, $meilisearch, $redis, $redisForPub, $redisForSub, $redisForTimelines],
exports: [$config, $db, $meilisearch, $redis, $redisForPub, $redisForSub, $redisForTimelines, RepositoryModule],
providers: [$config, $db, $meilisearch, $elasticsearch, $redis, $redisForPub, $redisForSub, $redisForTimelines],
exports: [$config, $db, $meilisearch, $elasticsearch, $redis, $redisForPub, $redisForSub, $redisForTimelines, RepositoryModule],
})
export class GlobalModule implements OnApplicationShutdown {
constructor(

View file

@ -66,6 +66,16 @@ type Source = {
scope?: 'local' | 'global' | string[];
};
elasticsearch?: {
host: string;
port: string;
user: string;
pass: string;
ssl?: boolean;
rejectUnauthorized?: boolean;
index: string;
};
skebStatus?: {
method: string;
endpoint: string;
@ -149,6 +159,15 @@ export type Config = {
index: string;
scope?: 'local' | 'global' | string[];
} | undefined;
elasticsearch: {
host: string;
port: string;
user: string;
pass: string;
ssl?: boolean;
rejectUnauthorized?: boolean;
index: string;
} | undefined;
skebStatus: {
method: string;
endpoint: string;
@ -272,6 +291,7 @@ export function loadConfig(): Config {
dbReplications: config.dbReplications,
dbSlaves: config.dbSlaves,
meilisearch: config.meilisearch,
elasticsearch: config.elasticsearch,
redis,
redisForPubsub: config.redisForPubsub ? convertRedisOptions(config.redisForPubsub, host) : redis,
redisForSystemQueue: config.redisForSystemQueue ? convertRedisOptions(config.redisForSystemQueue, host) : redisForJobQueue,

View file

@ -8,6 +8,7 @@ import { In } from 'typeorm';
import { DI } from '@/di-symbols.js';
import type { Config } from '@/config.js';
import { bindThis } from '@/decorators.js';
import { LoggerService } from '@/core/LoggerService.js';
import { MiNote } from '@/models/Note.js';
import { MiUser } from '@/models/_.js';
import type { NotesRepository } from '@/models/_.js';
@ -16,7 +17,9 @@ import { isUserRelated } from '@/misc/is-user-related.js';
import { CacheService } from '@/core/CacheService.js';
import { QueryService } from '@/core/QueryService.js';
import { IdService } from '@/core/IdService.js';
import type Logger from '@/logger.js';
import type { Index, MeiliSearch } from 'meilisearch';
import type { Client as ElasticSearch } from '@elastic/elasticsearch';
type K = string;
type V = string | number | boolean;
@ -65,6 +68,8 @@ function compileQuery(q: Q): string {
export class SearchService {
private readonly meilisearchIndexScope: 'local' | 'global' | string[] = 'local';
private meilisearchNoteIndex: Index | null = null;
private elasticsearchNoteIndex: string | null = null;
private logger: Logger;
constructor(
@Inject(DI.config)
@ -73,15 +78,24 @@ export class SearchService {
@Inject(DI.meilisearch)
private meilisearch: MeiliSearch | null,
@Inject(DI.elasticsearch)
private elasticsearch: ElasticSearch | null,
@Inject(DI.notesRepository)
private notesRepository: NotesRepository,
private cacheService: CacheService,
private queryService: QueryService,
private idService: IdService,
private loggerService: LoggerService,
) {
this.logger = this.loggerService.getLogger('note:search');
if (meilisearch) {
this.meilisearchNoteIndex = meilisearch.index(`${config.meilisearch!.index}---notes`);
if (config.meilisearch?.scope) {
this.meilisearchIndexScope = config.meilisearch.scope;
}
/*this.meilisearchNoteIndex.updateSettings({
searchableAttributes: [
'text',
@ -104,10 +118,52 @@ export class SearchService {
maxTotalHits: 10000,
},
});*/
}
if (config.meilisearch?.scope) {
this.meilisearchIndexScope = config.meilisearch.scope;
} else if (this.elasticsearch) {
this.elasticsearchNoteIndex = `${config.elasticsearch!.index}---notes`;
this.elasticsearch.indices.exists({
index: this.elasticsearchNoteIndex,
}).then((indexExists) => {
if (!indexExists) {
this.elasticsearch?.indices.create(
{
index: this.elasticsearchNoteIndex + `-${new Date().toISOString().slice(0, 7).replace(/-/g, '')}`,
mappings: {
properties: {
text: { type: 'text' },
cw: { type: 'text' },
createdAt: { type: 'long' },
userId: { type: 'keyword' },
userHost: { type: 'keyword' },
channelId: { type: 'keyword' },
tags: { type: 'keyword' },
},
},
settings: {
index: {
analysis: {
tokenizer: {
kuromoji: {
type: 'kuromoji_tokenizer',
mode: 'search',
},
},
analyzer: {
kuromoji_analyzer: {
type: 'custom',
tokenizer: 'kuromoji',
},
},
},
},
},
},
).catch((error) => {
this.logger.error(error);
});
}
}).catch((error) => {
this.logger.error('Error while checking if index exists', error);
});
}
}
@ -144,6 +200,23 @@ export class SearchService {
}], {
primaryKey: 'id',
});
} else if (this.elasticsearch) {
const body = {
createdAt: this.idService.parse(note.id).date.getTime(),
userId: note.userId,
userHost: note.userHost,
channelId: note.channelId,
cw: note.cw,
text: note.text,
tags: note.tags,
};
await this.elasticsearch.index({
index: this.elasticsearchNoteIndex + `-${new Date().toISOString().slice(0, 7).replace(/-/g, '')}` as string,
id: note.id,
body: body,
}).catch((error) => {
console.error(error);
});
}
}
@ -204,6 +277,67 @@ export class SearchService {
if (me && isUserRelated(note, userIdsWhoMeMuting)) return false;
return true;
});
return notes.sort((a, b) => a.id > b.id ? -1 : 1);
} else if (this.elasticsearch) {
const esFilter: any = {
bool: {
must: [],
},
};
if (pagination.untilId) esFilter.bool.must.push({ range: { createdAt: { lt: this.idService.parse(pagination.untilId).date.getTime() } } });
if (pagination.sinceId) esFilter.bool.must.push({ range: { createdAt: { gt: this.idService.parse(pagination.sinceId).date.getTime() } } });
if (opts.userId) esFilter.bool.must.push({ term: { userId: opts.userId } });
if (opts.channelId) esFilter.bool.must.push({ term: { channelId: opts.channelId } });
if (opts.host) {
if (opts.host === '.') {
esFilter.bool.must.push({ bool: { must_not: [{ exists: { field: 'userHost' } }] } });
} else {
esFilter.bool.must.push({ term: { userHost: opts.host } });
}
}
if (q !== '') {
esFilter.bool.must.push({
bool: {
should: [
{ wildcard: { 'text': { value: q } } },
{ simple_query_string: { fields: ['text'], 'query': q, default_operator: 'and' } },
{ wildcard: { 'cw': { value: q } } },
{ simple_query_string: { fields: ['cw'], 'query': q, default_operator: 'and' } },
],
minimum_should_match: 1,
},
});
}
const res = await (this.elasticsearch.search)({
index: this.elasticsearchNoteIndex + '*' as string,
body: {
query: esFilter,
sort: [{ createdAt: { order: 'desc' } }],
},
_source: ['id', 'createdAt'],
size: pagination.limit,
});
const noteIds = res.hits.hits.map((hit: any) => hit._id);
if (noteIds.length === 0) return [];
const [
userIdsWhoMeMuting,
userIdsWhoBlockingMe,
] = me ? await Promise.all([
this.cacheService.userMutingsCache.fetch(me.id),
this.cacheService.userBlockedCache.fetch(me.id),
]) : [new Set<string>(), new Set<string>()];
const notes = (await this.notesRepository.findBy({
id: In(noteIds),
})).filter(note => {
if (me && isUserRelated(note, userIdsWhoBlockingMe)) return false;
if (me && isUserRelated(note, userIdsWhoMeMuting)) return false;
return true;
});
return notes.sort((a, b) => a.id > b.id ? -1 : 1);
} else {
const query = this.queryService.makePaginationQuery(this.notesRepository.createQueryBuilder('note'), pagination.sinceId, pagination.untilId);

View file

@ -7,6 +7,7 @@ export const DI = {
config: Symbol('config'),
db: Symbol('db'),
meilisearch: Symbol('meilisearch'),
elasticsearch: Symbol('elasticsearch'),
redis: Symbol('redis'),
redisForPub: Symbol('redisForPub'),
redisForSub: Symbol('redisForSub'),