instrumental/lib/mastodon/cli/search.rb

# frozen_string_literal: true

require_relative 'base'

module Mastodon::CLI
  class Search < Base
    # Indices are sorted by amount of data to be expected in each, so that
    # smaller indices can go online sooner
    INDICES = [
      InstancesIndex,
      AccountsIndex,
      TagsIndex,
      PublicStatusesIndex,
      StatusesIndex,
    ].freeze

    option :concurrency, type: :numeric, default: 5, aliases: [:c], desc: 'Workload will be split between this number of threads'
    option :batch_size, type: :numeric, default: 100, aliases: [:b], desc: 'Number of records in each batch'
    option :only, type: :array, enum: %w(instances accounts tags statuses public_statuses), desc: 'Only process these indices'
    option :import, type: :boolean, default: true, desc: 'Import data from the database to the index'
    option :clean, type: :boolean, default: true, desc: 'Remove outdated documents from the index'
    option :reset_chewy, type: :boolean, default: false, desc: "Reset Chewy's internal index"
    desc 'deploy', 'Create or upgrade Elasticsearch indices and populate them'
    long_desc <<~LONG_DESC
      If Elasticsearch is empty, this command will create the necessary indices
      and then import data from the database into those indices.

      This command will also upgrade indices if the underlying schema has been
      changed since the last run. Index upgrades erase index data.

      Even if creating or upgrading indices is not necessary, data from the
      database will be imported into the indices, unless overridden with --no-import.
    LONG_DESC
    def deploy
      verify_deploy_options!

      indices = if options[:only]
                  options[:only].map { |str| "#{str.camelize}Index".constantize }
                else
                  INDICES
                end

      pool      = Concurrent::FixedThreadPool.new(options[:concurrency], max_queue: options[:concurrency] * 10)
      importers = indices.index_with { |index| "Importer::#{index.name}Importer".constantize.new(batch_size: options[:batch_size], executor: pool) }
      progress  = ProgressBar.create(
        {
          total: nil,
          format: '%t%c/%u |%b%i| %e (%r docs/s)',
          autofinish: false,
        }.merge(progress_output_options)
      )

      Chewy::Stash::Specification.reset! if options[:reset_chewy]

      # First, ensure all indices are created and have the correct
      # structure, so that live data can already be written
      indices.select { |index| index.specification.changed? }.each do |index|
        progress.title = "Upgrading #{index} "
        index.purge
        index.specification.lock!
      end

      progress.title = 'Estimating workload '
      progress.total = indices.sum { |index| importers[index].estimate! }

      reset_connection_pools!

      added   = 0
      removed = 0

      indices.each do |index|
        importer = importers[index]
        importer.optimize_for_import!

        importer.on_progress do |(indexed, deleted)|
          progress.total = nil if progress.progress + indexed + deleted > progress.total
          progress.progress += indexed + deleted
          added   += indexed
          removed += deleted
        end

        importer.on_failure do |reason|
          progress.log(pastel.red("Error while importing #{index}: #{reason}"))
        end

        if options[:import]
          progress.title = "Importing #{index} "
          importer.import!
        end

        if options[:clean]
          progress.title = "Cleaning #{index} "
          importer.clean_up!
        end
      ensure
        importer.optimize_for_search!
      end

      progress.title = 'Done! '
      progress.finish

      say("Indexed #{added} records, de-indexed #{removed}", :green, true)
    rescue Elasticsearch::Transport::Transport::ServerError => e
      fail_with_message <<~ERROR
        There was an issue connecting to the search server. Make sure the
        server is configured and running correctly, and that the environment
        variable settings match what the server is expecting.

        #{e.message}
      ERROR
    end

    private

    def verify_deploy_options!
      verify_deploy_concurrency!
      verify_deploy_batch_size!
    end

    def verify_deploy_concurrency!
      fail_with_message 'Cannot run with this concurrency setting, must be at least 1' if options[:concurrency] < 1
    end

    def verify_deploy_batch_size!
      fail_with_message 'Cannot run with this batch_size setting, must be at least 1' if options[:batch_size] < 1
    end

    def progress_output_options
      Rails.env.test? ? { output: ProgressBar::Outputs::Null } : {}
    end
  end
end
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 2019-03-28 10:16:11 +09:00			`# frozen_string_literal: true`

Add CLI Base class for command line code (#25106) 2023-05-24 18:55:40 +09:00			`require_relative 'base'`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 2019-03-28 10:16:11 +09:00
Move the mastodon/_cli files to mastodon/cli/ (#24139) 2023-05-23 23:08:26 +09:00			`module Mastodon::CLI`
Add CLI Base class for command line code (#25106) 2023-05-24 18:55:40 +09:00			`class Search < Base`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00			`# Indices are sorted by amount of data to be expected in each, so that`
			`# smaller indices can go online sooner`
			`INDICES = [`
Change interaction modal in web UI (#26075) Co-authored-by: Eugen Rochko <eugen@zeonfederated.com> 2023-07-27 23:11:17 +09:00			`InstancesIndex,`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00			`AccountsIndex,`
			`TagsIndex,`
Add new public status index (#26344) Co-authored-by: Eugen Rochko <eugen@zeonfederated.com> Co-authored-by: Claire <claire.github-309c@sitedethib.com> 2023-08-24 23:40:04 +09:00			`PublicStatusesIndex,`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00			`StatusesIndex,`
			`].freeze`

Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`option :concurrency, type: :numeric, default: 5, aliases: [:c], desc: 'Workload will be split between this number of threads'`
			`option :batch_size, type: :numeric, default: 100, aliases: [:b], desc: 'Number of records in each batch'`
Add PublicStatuses to tootctl search deploy --only option (#26896) 2023-09-12 16:59:59 +09:00			`option :only, type: :array, enum: %w(instances accounts tags statuses public_statuses), desc: 'Only process these indices'`
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`option :import, type: :boolean, default: true, desc: 'Import data from the database to the index'`
			`option :clean, type: :boolean, default: true, desc: 'Remove outdated documents from the index'`
Fix ES_PRESET not being applied to Chewy's internal index (#26489) 2023-08-15 02:00:56 +09:00			`option :reset_chewy, type: :boolean, default: false, desc: "Reset Chewy's internal index"`
Fix ElasticSearch to Elasticsearch (#17050) 2021-11-26 16:30:02 +09:00			`desc 'deploy', 'Create or upgrade Elasticsearch indices and populate them'`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 2019-03-28 10:16:11 +09:00			`long_desc <<~LONG_DESC`
Fix ElasticSearch to Elasticsearch (#17050) 2021-11-26 16:30:02 +09:00			`If Elasticsearch is empty, this command will create the necessary indices`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 2019-03-28 10:16:11 +09:00			`and then import data from the database into those indices.`

			`This command will also upgrade indices if the underlying schema has been`
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`changed since the last run. Index upgrades erase index data.`
Add parallelization to `tootctl search deploy` (#12051) * Add parallel gem * Modify parallel option in tootctl search deploy * Add paralell option to tootctl search deploy * Change 1 to false * Clean up * Rename --parallel to --processes 2019-10-03 04:50:43 +09:00
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00			`Even if creating or upgrading indices is not necessary, data from the`
Fix typos (#18604) * Fix typos Found via `codespell -q 3 -S ./CHANGELOG.md,./AUTHORS.md,./config/locales,./app/javascript/mastodon/locales -L ba,keypair,medias,pixelx,ro` * Follow-up typo fix 2022-08-29 00:44:34 +09:00			`database will be imported into the indices, unless overridden with --no-import.`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 2019-03-28 10:16:11 +09:00			`LONG_DESC`
			`def deploy`
Extract verify options method in search cli (#25121) 2023-06-01 21:35:05 +09:00			`verify_deploy_options!`
Add batch_size option to bin/tootctl search deploy (#17049) 2021-11-26 16:29:53 +09:00
Autofix Rubocop Style/RedundantBegin (#23703) 2023-02-19 07:09:40 +09:00			`indices = if options[:only]`
			`options[:only].map { \|str\| "#{str.camelize}Index".constantize }`
			`else`
			`INDICES`
			`end`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`pool = Concurrent::FixedThreadPool.new(options[:concurrency], max_queue: options[:concurrency] * 10)`
			`importers = indices.index_with { \|index\| "Importer::#{index.name}Importer".constantize.new(batch_size: options[:batch_size], executor: pool) }`
Remove the `stub_stdout` wrapper around CLI specs (#28340) 2023-12-13 19:14:19 +09:00			`progress = ProgressBar.create(`
			`{`
			`total: nil,`
			`format: '%t%c/%u \|%b%i\| %e (%r docs/s)',`
			`autofinish: false,`
			`}.merge(progress_output_options)`
			`)`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00
Fix ES_PRESET not being applied to Chewy's internal index (#26489) 2023-08-15 02:00:56 +09:00			`Chewy::Stash::Specification.reset! if options[:reset_chewy]`

Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00			`# First, ensure all indices are created and have the correct`
			`# structure, so that live data can already be written`
			`indices.select { \|index\| index.specification.changed? }.each do \|index\|`
			`progress.title = "Upgrading #{index} "`
			`index.purge`
			`index.specification.lock!`
			`end`

Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`progress.title = 'Estimating workload '`
			`progress.total = indices.sum { \|index\| importers[index].estimate! }`

Fix opening and closing Redis connections instead of using a pool (#18171) * Fix opening and closing Redis connections instead of using a pool * Fix Redis connections not being returned to the pool in CLI commands 2022-04-30 05:43:07 +09:00			`reset_connection_pools!`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`added = 0`
			`removed = 0`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`indices.each do \|index\|`
			`importer = importers[index]`
			`importer.optimize_for_import!`

			`importer.on_progress do \|(indexed, deleted)\|`
			`progress.total = nil if progress.progress + indexed + deleted > progress.total`
			`progress.progress += indexed + deleted`
			`added += indexed`
			`removed += deleted`
			`end`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`importer.on_failure do \|reason\|`
			`progress.log(pastel.red("Error while importing #{index}: #{reason}"))`
			`end`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`if options[:import]`
			`progress.title = "Importing #{index} "`
			`importer.import!`
			`end`

			`if options[:clean]`
			`progress.title = "Cleaning #{index} "`
			`importer.clean_up!`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00			`end`
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`ensure`
			`importer.optimize_for_search!`
Add parallelization to `tootctl search deploy` (#12051) * Add parallel gem * Modify parallel option in tootctl search deploy * Add paralell option to tootctl search deploy * Change 1 to false * Clean up * Rename --parallel to --processes 2019-10-03 04:50:43 +09:00			`end`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`progress.title = 'Done! '`
			`progress.finish`
Change `tootctl search deploy` algorithm (#14300) 2020-07-15 01:10:35 +09:00
Change algorithm of `tootctl search deploy` to improve performance (#18463) 2022-05-23 05:16:43 +09:00			`say("Indexed #{added} records, de-indexed #{removed}", :green, true)`
Add diagnostic message for failure during CLI search deploy (#29462) 2024-03-15 23:26:23 +09:00			`rescue Elasticsearch::Transport::Transport::ServerError => e`
			`fail_with_message <<~ERROR`
			`There was an issue connecting to the search server. Make sure the`
			`server is configured and running correctly, and that the environment`
			`variable settings match what the server is expecting.`

			`#{e.message}`
			`ERROR`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 2019-03-28 10:16:11 +09:00			`end`
Extract verify options method in search cli (#25121) 2023-06-01 21:35:05 +09:00
			`private`

			`def verify_deploy_options!`
			`verify_deploy_concurrency!`
			`verify_deploy_batch_size!`
			`end`

			`def verify_deploy_concurrency!`
Handle CLI failure exit status at the top-level script (#28322) 2024-01-26 17:53:44 +09:00			`fail_with_message 'Cannot run with this concurrency setting, must be at least 1' if options[:concurrency] < 1`
Extract verify options method in search cli (#25121) 2023-06-01 21:35:05 +09:00			`end`

			`def verify_deploy_batch_size!`
Handle CLI failure exit status at the top-level script (#28322) 2024-01-26 17:53:44 +09:00			`fail_with_message 'Cannot run with this batch_size setting, must be at least 1' if options[:batch_size] < 1`
Extract verify options method in search cli (#25121) 2023-06-01 21:35:05 +09:00			`end`
Remove the `stub_stdout` wrapper around CLI specs (#28340) 2023-12-13 19:14:19 +09:00
			`def progress_output_options`
			`Rails.env.test? ? { output: ProgressBar::Outputs::Null } : {}`
			`end`
Add `tootctl search deploy` to avoid ugly rake task syntax (#10403) 2019-03-28 10:16:11 +09:00			`end`
			`end`