Browse Source

remove chewy

reactions
Lerk 5 months ago
parent
commit
76ebf75fa7
  1. 1
      .dockerignore
  2. 5
      .env.nanobox
  3. 9
      .env.production.sample
  4. 3
      .gitignore
  5. 1
      Gemfile
  6. 35
      Gemfile.lock
  7. 43
      app/chewy/accounts_index.rb
  8. 66
      app/chewy/statuses_index.rb
  9. 37
      app/chewy/tags_index.rb
  10. 2
      app/models/account.rb
  11. 2
      app/models/account_stat.rb
  12. 6
      app/models/application_record.rb
  13. 2
      app/models/bookmark.rb
  14. 2
      app/models/favourite.rb
  15. 2
      app/models/status.rb
  16. 2
      app/models/tag.rb
  17. 33
      app/services/account_search_service.rb
  18. 4
      app/services/batched_remove_status_service.rb
  19. 2
      app/services/delete_account_service.rb
  20. 26
      app/services/search_service.rb
  21. 69
      app/services/tag_search_service.rb
  22. 34
      boxfile.yml
  23. 3
      chart/Chart.lock
  24. 4
      chart/Chart.yaml
  25. 3
      chart/templates/_helpers.tpl
  26. 5
      chart/templates/configmap-env.yaml
  27. 79
      chart/templates/job-chewy-upgrade.yaml
  28. 13
      chart/values.yaml
  29. 1
      config/application.rb
  30. 39
      config/initializers/chewy.rb
  31. 19
      docker-compose.yml
  32. 11
      lib/chewy/strategy/custom_sidekiq.rb
  33. 1
      lib/mastodon/cli_helper.rb
  34. 156
      lib/mastodon/search_cli.rb
  35. 2
      spec/rails_helper.rb
  36. 1
      spec/spec_helper.rb

1
.dockerignore

@ -17,5 +17,4 @@ vendor/bundle
postgres
postgres14
redis
elasticsearch
chart

5
.env.nanobox

@ -13,11 +13,6 @@ DB_PORT=5432
# DATABASE_URL=postgresql://$DATA_DB_USER:$DATA_DB_PASS@$DATA_DB_HOST/gonano
# Optional Elasticsearch configuration
ES_ENABLED=true
ES_HOST=$DATA_ELASTIC_HOST
ES_PORT=9200
BIND=0.0.0.0
# Federation

9
.env.production.sample

@ -29,15 +29,6 @@ DB_NAME=mastodon_production
DB_PASS=
DB_PORT=5432
# Elasticsearch (optional)
# ------------------------
ES_ENABLED=true
ES_HOST=localhost
ES_PORT=9200
# Authentication for ES (optional)
ES_USER=elastic
ES_PASS=password
# Secrets
# -------
# Make sure to use `rake secret` to generate secrets

3
.gitignore

@ -38,11 +38,10 @@
.vscode/
.idea/
# Ignore postgres + redis + elasticsearch volume optionally created by docker-compose
# Ignore postgres + redis volumes optionally created by docker-compose
/postgres
/postgres14
/redis
/elasticsearch
# ignore Helm dependency charts
/chart/charts/*.tgz

1
Gemfile

@ -30,7 +30,6 @@ gem 'bootsnap', '~> 1.9.2', require: false
gem 'browser'
gem 'charlock_holmes', '~> 0.7.7'
gem 'iso-639'
gem 'chewy', '~> 7.2'
gem 'cld3', '~> 3.4.3'
gem 'devise', '~> 4.8'
gem 'devise-two-factor', '~> 4.0'

35
Gemfile.lock

@ -147,10 +147,6 @@ GEM
activesupport
cbor (0.5.9.6)
charlock_holmes (0.7.7)
chewy (7.2.3)
activesupport (>= 5.2)
elasticsearch (>= 7.12.0, < 7.14.0)
elasticsearch-dsl
chunky_png (1.4.0)
cld3 (3.4.3)
ffi (>= 1.1.0, < 1.16.0)
@ -197,15 +193,6 @@ GEM
railties (>= 3.2)
e2mmap (0.1.0)
ed25519 (1.2.4)
elasticsearch (7.13.3)
elasticsearch-api (= 7.13.3)
elasticsearch-transport (= 7.13.3)
elasticsearch-api (7.13.3)
multi_json
elasticsearch-dsl (0.1.10)
elasticsearch-transport (7.13.3)
faraday (~> 1)
multi_json
encryptor (3.0.0)
erubi (1.10.0)
et-orbi (1.2.6)
@ -214,25 +201,6 @@ GEM
fabrication (2.23.1)
faker (2.19.0)
i18n (>= 1.6, < 2)
faraday (1.8.0)
faraday-em_http (~> 1.0)
faraday-em_synchrony (~> 1.0)
faraday-excon (~> 1.1)
faraday-httpclient (~> 1.0.1)
faraday-net_http (~> 1.0)
faraday-net_http_persistent (~> 1.1)
faraday-patron (~> 1.0)
faraday-rack (~> 1.0)
multipart-post (>= 1.2, < 3)
ruby2_keywords (>= 0.0.4)
faraday-em_http (1.0.0)
faraday-em_synchrony (1.0.0)
faraday-excon (1.1.0)
faraday-httpclient (1.0.1)
faraday-net_http (1.0.1)
faraday-net_http_persistent (1.2.0)
faraday-patron (1.0.0)
faraday-rack (1.0.0)
fast_blank (1.0.1)
fastimage (2.2.6)
ffi (1.15.4)
@ -380,7 +348,6 @@ GEM
minitest (5.15.0)
msgpack (1.4.2)
multi_json (1.15.0)
multipart-post (2.1.1)
net-ldap (0.17.0)
net-scp (3.0.0)
net-ssh (>= 2.6.5, < 7.0.0)
@ -562,7 +529,6 @@ GEM
ruby-saml (1.13.0)
nokogiri (>= 1.10.5)
rexml
ruby2_keywords (0.0.5)
rufus-scheduler (3.8.0)
fugit (~> 1.1, >= 1.1.6)
safety_net_attestation (0.4.0)
@ -710,7 +676,6 @@ DEPENDENCIES
capistrano-yarn (~> 2.0)
capybara (~> 3.36)
charlock_holmes (~> 0.7.7)
chewy (~> 7.2)
cld3 (~> 3.4.3)
climate_control (~> 0.2)
color_diff (~> 0.1)

43
app/chewy/accounts_index.rb

@ -1,43 +0,0 @@
# frozen_string_literal: true
class AccountsIndex < Chewy::Index
settings index: { refresh_interval: '5m' }, analysis: {
analyzer: {
content: {
tokenizer: 'whitespace',
filter: %w(lowercase asciifolding cjk_width),
},
edge_ngram: {
tokenizer: 'edge_ngram',
filter: %w(lowercase asciifolding cjk_width),
},
},
tokenizer: {
edge_ngram: {
type: 'edge_ngram',
min_gram: 1,
max_gram: 15,
},
},
}
index_scope ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? }
root date_detection: false do
field :id, type: 'long'
field :display_name, type: 'text', analyzer: 'content' do
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
end
field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
end
field :following_count, type: 'long', value: ->(account) { account.following.local.count }
field :followers_count, type: 'long', value: ->(account) { account.followers.local.count }
field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
end
end

66
app/chewy/statuses_index.rb

@ -1,66 +0,0 @@
# frozen_string_literal: true
class StatusesIndex < Chewy::Index
settings index: { refresh_interval: '15m' }, analysis: {
filter: {
english_stop: {
type: 'stop',
stopwords: '_english_',
},
english_stemmer: {
type: 'stemmer',
language: 'english',
},
english_possessive_stemmer: {
type: 'stemmer',
language: 'possessive_english',
},
},
analyzer: {
content: {
tokenizer: 'uax_url_email',
filter: %w(
english_possessive_stemmer
lowercase
asciifolding
cjk_width
english_stop
english_stemmer
),
},
},
}
index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll)
crutch :mentions do |collection|
data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :favourites do |collection|
data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :reblogs do |collection|
data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :bookmarks do |collection|
data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
root date_detection: false do
field :id, type: 'long'
field :account_id, type: 'long'
field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
field :stemmed, type: 'text', analyzer: 'content'
end
field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
end
end

37
app/chewy/tags_index.rb

@ -1,37 +0,0 @@
# frozen_string_literal: true
class TagsIndex < Chewy::Index
settings index: { refresh_interval: '15m' }, analysis: {
analyzer: {
content: {
tokenizer: 'keyword',
filter: %w(lowercase asciifolding cjk_width),
},
edge_ngram: {
tokenizer: 'edge_ngram',
filter: %w(lowercase asciifolding cjk_width),
},
},
tokenizer: {
edge_ngram: {
type: 'edge_ngram',
min_gram: 2,
max_gram: 15,
},
},
}
index_scope ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? }
root date_detection: false do
field :name, type: 'text', analyzer: 'content' do
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
end
field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day.accounts } }
field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
end
end

2
app/models/account.rb

@ -145,8 +145,6 @@ class Account < ApplicationRecord
delegate :chosen_languages, to: :user, prefix: false, allow_nil: true
update_index('accounts', :self)
def local?
domain.nil?
end

2
app/models/account_stat.rb

@ -18,6 +18,4 @@ class AccountStat < ApplicationRecord
self.ignored_columns = %w(lock_version)
belongs_to :account, inverse_of: :account_stat
update_index('accounts', :account)
end

6
app/models/application_record.rb

@ -5,12 +5,6 @@ class ApplicationRecord < ActiveRecord::Base
include Remotable
class << self
def update_index(_type_name, *_args, &_block)
super if Chewy.enabled?
end
end
def boolean_with_default(key, default_value)
value = attributes[key]

2
app/models/bookmark.rb

@ -13,8 +13,6 @@
class Bookmark < ApplicationRecord
include Paginable
update_index('statuses', :status) if Chewy.enabled?
belongs_to :account, inverse_of: :bookmarks
belongs_to :status, inverse_of: :bookmarks

2
app/models/favourite.rb

@ -13,8 +13,6 @@
class Favourite < ApplicationRecord
include Paginable
update_index('statuses', :status)
belongs_to :account, inverse_of: :favourites
belongs_to :status, inverse_of: :favourites

2
app/models/status.rb

@ -64,8 +64,6 @@ class Status < ApplicationRecord
# will be based on current time instead of `created_at`
attr_accessor :override_timestamps
update_index('statuses', :proper)
enum visibility: [:public, :unlisted, :private, :direct, :limited], _suffix: :visibility
belongs_to :application, class_name: 'Doorkeeper::Application', optional: true

2
app/models/tag.rb

@ -40,8 +40,6 @@ class Tag < ApplicationRecord
scope :recently_used, ->(account) { joins(:statuses).where(statuses: { id: account.statuses.select(:id).limit(1000) }).group(:id).order(Arel.sql('count(*) desc')) }
scope :matches_name, ->(term) { where(arel_table[:name].lower.matches(arel_table.lower("#{sanitize_sql_like(Tag.normalize(term))}%"), nil, true)) } # Search with case-sensitive to use B-tree index
update_index('tags', :self)
def to_param
name
end

33
app/services/account_search_service.rb

@ -46,8 +46,7 @@ class AccountSearchService < BaseService
return [] if limit_for_non_exact_results.zero?
@search_results ||= begin
results = from_elasticsearch if Chewy.enabled?
results ||= from_database
results = from_database
results
end
end
@ -68,36 +67,6 @@ class AccountSearchService < BaseService
Account.search_for(terms_for_query, limit_for_non_exact_results, offset)
end
def from_elasticsearch
must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct.edge_ngram acct) : %w(acct.edge_ngram acct display_name.edge_ngram display_name), type: 'most_fields', operator: 'and' } }]
should_clauses = []
if account
return [] if options[:following] && following_ids.empty?
if options[:following]
must_clauses << { terms: { id: following_ids } }
elsif following_ids.any?
should_clauses << { terms: { id: following_ids, boost: 100 } }
end
end
query = { bool: { must: must_clauses, should: should_clauses } }
functions = [reputation_score_function, followers_score_function, time_distance_function]
records = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' })
.limit(limit_for_non_exact_results)
.offset(offset)
.objects
.compact
ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
records
rescue Faraday::ConnectionFailed, Parslet::ParseFailed
nil
end
def reputation_score_function
{
script_score: {

4
app/services/batched_remove_status_service.rb

@ -29,10 +29,6 @@ class BatchedRemoveStatusService < BaseService
# cascade the delete faster without loading the associations.
statuses_and_reblogs.each_slice(50) { |slice| Status.where(id: slice.map(&:id)).delete_all }
# Since we skipped all callbacks, we also need to manually
# deindex the statuses
Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) if Chewy.enabled?
return if options[:skip_side_effects]
# Batch by source account

2
app/services/delete_account_service.rb

@ -187,7 +187,6 @@ class DeleteAccountService < BaseService
@account.favourites.in_batches do |favourites|
ids = favourites.pluck(:status_id)
StatusStat.where(status_id: ids).update_all('favourites_count = GREATEST(0, favourites_count - 1)')
Chewy.strategy.current.update(StatusesIndex, ids) if Chewy.enabled?
Rails.cache.delete_multi(ids.map { |id| "statuses/#{id}" })
favourites.delete_all
end
@ -195,7 +194,6 @@ class DeleteAccountService < BaseService
def purge_bookmarks!
@account.bookmarks.in_batches do |bookmarks|
Chewy.strategy.current.update(StatusesIndex, bookmarks.pluck(:status_id)) if Chewy.enabled?
bookmarks.delete_all
end
end

26
app/services/search_service.rb

@ -38,37 +38,11 @@ class SearchService < BaseService
end
def perform_statuses_search!
return perform_statuses_search_chewy! if Chewy.enabled?
user_language = language_code_to_fts_locale(@account.user.locale)
search_results = Status.search_content(@query, user_language).select(&:searchable_by).slice(@offset, @limit)
search_results
end
def perform_statuses_search_chewy!
definition = parsed_query.apply(StatusesIndex.filter(term: { searchable_by: @account.id }))
if @options[:account_id].present?
definition = definition.filter(term: { account_id: @options[:account_id] })
end
if @options[:min_id].present? || @options[:max_id].present?
range = {}
range[:gt] = @options[:min_id].to_i if @options[:min_id].present?
range[:lt] = @options[:max_id].to_i if @options[:max_id].present?
definition = definition.filter(range: { id: range })
end
results = definition.limit(@limit).offset(@offset).objects.compact
account_ids = results.map(&:account_id)
account_domains = results.map(&:account_domain)
preloaded_relations = relations_map_for_account(@account, account_ids, account_domains)
results.reject { |status| StatusFilter.new(status, @account, preloaded_relations).filtered? }
rescue Faraday::ConnectionFailed, Parslet::ParseFailed
[]
end
def perform_hashtags_search!
TagSearchService.new.call(
@query,

69
app/services/tag_search_service.rb

@ -7,80 +7,13 @@ class TagSearchService < BaseService
@limit = options.delete(:limit).to_i
@options = options
results = from_elasticsearch if Chewy.enabled?
results ||= from_database
results = from_database
results
end
private
def from_elasticsearch
query = {
function_score: {
query: {
multi_match: {
query: @query,
fields: %w(name.edge_ngram name),
type: 'most_fields',
operator: 'and',
},
},
functions: [
{
field_value_factor: {
field: 'usage',
modifier: 'log2p',
missing: 0,
},
},
{
gauss: {
last_status_at: {
scale: '7d',
offset: '14d',
decay: 0.5,
},
},
},
],
boost_mode: 'multiply',
},
}
filter = {
bool: {
should: [
{
term: {
reviewed: {
value: true,
},
},
},
{
match: {
name: {
query: @query,
},
},
},
],
},
}
definition = TagsIndex.query(query)
definition = definition.filter(filter) if @options[:exclude_unreviewed]
definition.limit(@limit).offset(@offset).objects.compact
rescue Faraday::ConnectionFailed, Parslet::ParseFailed
nil
end
def from_database
Tag.search_for(@query, @limit, @offset, @options)
end

34
boxfile.yml

@ -62,13 +62,6 @@ deploy.config:
web.web:
- bin/tootctl cache clear
- bundle exec rake db:migrate:setup
after_live:
worker.sidekiq:
- |-
if [[ "${ES_ENABLED}" != "false" ]]
then
bin/tootctl search deploy
fi
web.web:
@ -145,33 +138,6 @@ data.db:
curl -k -H "X-AUTH-TOKEN: ${WAREHOUSE_DATA_HOARDER_TOKEN}" https://${WAREHOUSE_DATA_HOARDER_HOST}:7410/blobs/${file} -X DELETE
done
data.elastic:
image: nanobox/elasticsearch:5
cron:
- id: backup
schedule: '0 3 * * *'
command: |
id=$(cat /proc/sys/kernel/random/uuid)
curl -X PUT -H "Content-Type: application/json" "127.0.0.1:9200/_snapshot/${id}" -d "{\"type\": \"fs\",\"settings\": {\"location\": \"/var/tmp/${id}\",\"compress\": true}}"
curl -X PUT -H "Content-Type: application/json" "127.0.0.1:9200/_snapshot/${id}/backup?wait_for_completion=true&pretty"
tar -cz -C "/var/tmp/${id}" . |
curl -k -H "X-AUTH-TOKEN: ${WAREHOUSE_DATA_HOARDER_TOKEN}" https://${WAREHOUSE_DATA_HOARDER_HOST}:7410/blobs/backup-${HOSTNAME}-$(date -u +%Y-%m-%d.%H-%M-%S).tgz -X POST -T - >&2
curl -X DELETE -H "Content-Type: application/json" "127.0.0.1:9200/_snapshot/${id}"
rm -rf "/var/tmp/${id}"
curl -k -s -H "X-AUTH-TOKEN: ${WAREHOUSE_DATA_HOARDER_TOKEN}" https://${WAREHOUSE_DATA_HOARDER_HOST}:7410/blobs/ |
sed 's/,/\n/g' |
grep ${HOSTNAME} |
sort |
head -n-${BACKUP_COUNT:-1} |
sed 's/.*: \?"\(.*\)".*/\1/' |
while read file
do
curl -k -H "X-AUTH-TOKEN: ${WAREHOUSE_DATA_HOARDER_TOKEN}" https://${WAREHOUSE_DATA_HOARDER_HOST}:7410/blobs/${file} -X DELETE
done
data.redis:
image: nanobox/redis:4.0

3
chart/Chart.lock

@ -1,7 +1,4 @@
dependencies:
- name: elasticsearch
repository: https://charts.bitnami.com/bitnami
version: 15.10.3
- name: postgresql
repository: https://charts.bitnami.com/bitnami
version: 8.10.14

4
chart/Chart.yaml

@ -23,10 +23,6 @@ version: 1.1.1
appVersion: 3.3.0
dependencies:
- name: elasticsearch
version: 15.10.3
repository: https://charts.bitnami.com/bitnami
condition: elasticsearch.enabled
- name: postgresql
version: 8.10.14
repository: https://charts.bitnami.com/bitnami

3
chart/templates/_helpers.tpl

@ -66,9 +66,6 @@ Create the name of the service account to use
Create a default fully qualified name for dependent services.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
*/}}
{{- define "mastodon.elasticsearch.fullname" -}}
{{- printf "%s-%s" .Release.Name "elasticsearch" | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- define "mastodon.redis.fullname" -}}
{{- printf "%s-%s" .Release.Name "redis" | trunc 63 | trimSuffix "-" -}}

5
chart/templates/configmap-env.yaml

@ -15,11 +15,6 @@ data:
DB_PORT: "5432"
DB_USER: {{ .Values.postgresql.postgresqlUsername }}
DEFAULT_LOCALE: {{ .Values.mastodon.locale }}
{{- if .Values.elasticsearch.enabled }}
ES_ENABLED: "true"
ES_HOST: {{ template "mastodon.elasticsearch.fullname" . }}-master
ES_PORT: "9200"
{{- end }}
LOCAL_DOMAIN: {{ .Values.mastodon.local_domain }}
# https://devcenter.heroku.com/articles/tuning-glibc-memory-behavior
MALLOC_ARENA_MAX: "2"

79
chart/templates/job-chewy-upgrade.yaml

@ -1,79 +0,0 @@
{{- if .Values.elasticsearch.enabled }}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "mastodon.fullname" . }}-chewy-upgrade
labels:
{{- include "mastodon.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
"helm.sh/hook-weight": "-1"
spec:
template:
metadata:
name: {{ include "mastodon.fullname" . }}-chewy-upgrade
spec:
restartPolicy: Never
{{- if (not .Values.mastodon.s3.enabled) }}
# ensure we run on the same node as the other rails components; only
# required when using PVCs that are ReadWriteOnce
{{- if or (eq "ReadWriteOnce" .Values.mastodon.persistence.assets.accessMode) (eq "ReadWriteOnce" .Values.mastodon.persistence.system.accessMode) }}
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: component
operator: In
values:
- rails
topologyKey: kubernetes.io/hostname
{{- end }}
volumes:
- name: assets
persistentVolumeClaim:
claimName: {{ template "mastodon.fullname" . }}-assets
- name: system
persistentVolumeClaim:
claimName: {{ template "mastodon.fullname" . }}-system
{{- end }}
containers:
- name: {{ include "mastodon.fullname" . }}-chewy-setup
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
command:
- bundle
- exec
- rake
- chewy:upgrade
envFrom:
- configMapRef:
name: {{ include "mastodon.fullname" . }}-env
- secretRef:
name: {{ template "mastodon.fullname" . }}
env:
- name: "DB_PASS"
valueFrom:
secretKeyRef:
{{- if .Values.postgresql.enabled }}
name: {{ .Release.Name }}-postgresql
{{- else }}
name: {{ template "mastodon.fullname" . }}
{{- end }}
key: postgresql-password
- name: "REDIS_PASSWORD"
valueFrom:
secretKeyRef:
name: {{ .Release.Name }}-redis
key: redis-password
- name: "PORT"
value: {{ .Values.mastodon.web.port | quote }}
{{- if (not .Values.mastodon.s3.enabled) }}
volumeMounts:
- name: assets
mountPath: /opt/mastodon/public/assets
- name: system
mountPath: /opt/mastodon/public/system
{{- end }}
{{- end }}

13
chart/values.yaml

@ -102,19 +102,6 @@ ingress:
hosts:
- mastodon.local
# https://github.com/bitnami/charts/tree/master/bitnami/elasticsearch#parameters
elasticsearch:
# `false` will disable full-text search
#
# if you enable ES after the initial install, you will need to manually run
# RAILS_ENV=production bundle exec rake chewy:sync
# (https://docs.joinmastodon.org/admin/optional/elasticsearch/)
enabled: true
# may be removed once https://git.lrk.sh/comm.network/mastodon/pull/13828 is part
# of a tagged release
image:
tag: 6
# https://github.com/bitnami/charts/tree/master/bitnami/postgresql#parameters
postgresql:
# disable if you want to use an existing db; in which case the values below

1
config/application.rb

@ -38,7 +38,6 @@ require_relative '../lib/mastodon/snowflake'
require_relative '../lib/mastodon/version'
require_relative '../lib/devise/two_factor_ldap_authenticatable'
require_relative '../lib/devise/two_factor_pam_authenticatable'
require_relative '../lib/chewy/strategy/custom_sidekiq'
require_relative '../lib/webpacker/manifest_extensions'
require_relative '../lib/webpacker/helper_extensions'
require_relative '../lib/action_dispatch/cookie_jar_extensions'

39
config/initializers/chewy.rb

@ -1,39 +0,0 @@
enabled = ENV['ES_ENABLED'] == 'true'
host = ENV.fetch('ES_HOST') { 'localhost' }
port = ENV.fetch('ES_PORT') { 9200 }
user = ENV.fetch('ES_USER') { nil }
password = ENV.fetch('ES_PASS') { nil }
fallback_prefix = ENV.fetch('REDIS_NAMESPACE') { nil }
prefix = ENV.fetch('ES_PREFIX') { fallback_prefix }
Chewy.settings = {
host: "#{host}:#{port}",
prefix: prefix,
enabled: enabled,
journal: false,
user: user,
password: password,
sidekiq: { queue: 'pull' },
}
# We use our own async strategy even outside the request-response
# cycle, which takes care of checking if Elasticsearch is enabled
# or not. However, mind that for the Rails console, the :urgent
# strategy is set automatically with no way to override it.
Chewy.root_strategy = :custom_sidekiq
Chewy.request_strategy = :custom_sidekiq
Chewy.use_after_commit_callbacks = false
module Chewy
class << self
def enabled?
settings[:enabled]
end
end
end
# Elasticsearch uses Faraday internally. Faraday interprets the
# http_proxy env variable by default which leads to issues when
# Mastodon is run with hidden services enabled, because
# Elasticsearch is *not* supposed to be accessed through a proxy
Faraday.ignore_env_proxy = true

19
docker-compose.yml

@ -24,25 +24,6 @@ services:
volumes:
- ./redis:/data
# es:
# restart: always
# image: docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2
# environment:
# - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
# - "cluster.name=es-mastodon"
# - "discovery.type=single-node"
# - "bootstrap.memory_lock=true"
# networks:
# - internal_network
# healthcheck:
# test: ["CMD-SHELL", "curl --silent --fail localhost:9200/_cluster/health || exit 1"]
# volumes:
# - ./elasticsearch:/usr/share/elasticsearch/data
# ulimits:
# memlock:
# soft: -1
# hard: -1
web:
build: .
image: comm-network/mastodon

11
lib/chewy/strategy/custom_sidekiq.rb

@ -1,11 +0,0 @@
# frozen_string_literal: true
module Chewy
class Strategy
class CustomSidekiq < Sidekiq
def update(_type, _objects, _options = {})
super if Chewy.enabled?
end
end
end
end

1
lib/mastodon/cli_helper.rb

@ -7,7 +7,6 @@ ActiveRecord::Base.logger = dev_null
ActiveJob::Base.logger = dev_null
HttpLog.configuration.logger = dev_null
Paperclip.options[:log] = false
Chewy.logger = dev_null
module Mastodon
module CLIHelper

156
lib/mastodon/search_cli.rb

@ -1,156 +0,0 @@
# frozen_string_literal: true
require_relative '../../config/boot'
require_relative '../../config/environment'
require_relative 'cli_helper'
module Mastodon
class SearchCLI < Thor
include CLIHelper
# Indices are sorted by amount of data to be expected in each, so that
# smaller indices can go online sooner
INDICES = [
AccountsIndex,
TagsIndex,
StatusesIndex,
].freeze
option :concurrency, type: :numeric, default: 2, aliases: [:c], desc: 'Workload will be split between this number of threads'
option :batch_size, type: :numeric, default: 1_000, aliases: [:b], desc: 'Number of records in each batch'
option :only, type: :array, enum: %w(accounts tags statuses), desc: 'Only process these indices'
desc 'deploy', 'Create or upgrade Elasticsearch indices and populate them'
long_desc <<~LONG_DESC
If Elasticsearch is empty, this command will create the necessary indices
and then import data from the database into those indices.
This command will also upgrade indices if the underlying schema has been
changed since the last run.
Even if creating or upgrading indices is not necessary, data from the
database will be imported into the indices.
LONG_DESC
def deploy
if options[:concurrency] < 1
say('Cannot run with this concurrency setting, must be at least 1', :red)
exit(1)
end
if options[:batch_size] < 1
say('Cannot run with this batch_size setting, must be at least 1', :red)
exit(1)
end
indices = begin
if options[:only]
options[:only].map { |str| "#{str.camelize}Index".constantize }
else
INDICES
end
end
progress = ProgressBar.create(total: nil, format: '%t%c/%u |%b%i| %e (%r docs/s)', autofinish: false)
# First, ensure all indices are created and have the correct
# structure, so that live data can already be written
indices.select { |index| index.specification.changed? }.each do |index|
progress.title = "Upgrading #{index} "
index.purge
index.specification.lock!
end
db_config = ActiveRecord::Base.configurations[Rails.env].dup
db_config['pool'] = options[:concurrency] + 1
ActiveRecord::Base.establish_connection(db_config)
pool = Concurrent::FixedThreadPool.new(options[:concurrency])
added = Concurrent::AtomicFixnum.new(0)
removed = Concurrent::AtomicFixnum.new(0)
progress.title = 'Estimating workload '
# Estimate the amount of data that has to be imported first
progress.total = indices.sum { |index| index.adapter.default_scope.count }
# Now import all the actual data. Mind that unlike chewy:sync, we don't
# fetch and compare all record IDs from the database and the index to
# find out which to add and which to remove from the index. Because with
# potentially millions of rows, the memory footprint of such a calculation
# is uneconomical. So we only ever add.
indices.each do |index|
progress.title = "Importing #{index} "
batch_size = options[:batch_size]
slice_size = (batch_size / options[:concurrency]).ceil
index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
futures = []
batch.each_slice(slice_size) do |records|
futures << Concurrent::Future.execute(executor: pool) do
begin
if !progress.total.nil? && progress.progress + records.size > progress.total
# The number of items has changed between start and now,
# since there is no good way to predict the final count from
# here, just change the progress bar to an indeterminate one
progress.total = nil
end
grouped_records = nil
bulk_body = nil
index_count = 0
delete_count = 0
ActiveRecord::Base.connection_pool.with_connection do
grouped_records = records.to_a.group_by do |record|
index.adapter.send(:delete_from_index?, record) ? :delete : :to_index
end
bulk_body = Chewy::Index::Import::BulkBuilder.new(index, **grouped_records).bulk_body
end
index_count = grouped_records[:to_index].size if grouped_records.key?(:to_index)
delete_count = grouped_records[:delete].size if grouped_records.key?(:delete)
# The following is an optimization for statuses specifically, since
# we want to de-index statuses that cannot be searched by anybody,
# but can't use Chewy's delete_if logic because it doesn't use
# crutches and our searchable_by logic depends on them
if index == StatusesIndex
bulk_body.map! do |entry|
if entry[:to_index] && entry.dig(:to_index, :data, 'searchable_by').blank?
index_count -= 1
delete_count += 1
{ delete: entry[:to_index].except(:data) }
else
entry
end
end
end
Chewy::Index::Import::BulkRequest.new(index).perform(bulk_body)
progress.progress += records.size
added.increment(index_count)
removed.increment(delete_count)
sleep 1
rescue => e
progress.log pastel.red("Error importing #{index}: #{e}")
end
end
end
futures.map(&:value)
end
end
progress.title = ''
progress.stop
say("Indexed #{added.value} records, de-indexed #{removed.value}", :green, true)
end
end
end

2
spec/rails_helper.rb

@ -12,7 +12,7 @@ require 'capybara/rspec'
Dir[Rails.root.join('spec/support/**/*.rb')].each { |f| require f }
ActiveRecord::Migration.maintain_test_schema!
WebMock.disable_net_connect!(allow: Chewy.settings[:host])
WebMock.disable_net_connect!
Redis.current = Redis::Namespace.new("mastodon_test#{ENV['TEST_ENV_NUMBER']}", redis: Redis.current)
Sidekiq::Testing.inline!
Sidekiq.logger = nil

1
spec/spec_helper.rb

@ -29,7 +29,6 @@ RSpec.configure do |config|
config.before :suite do
Rails.application.load_seed
Chewy.strategy(:bypass)
end
config.after :suite do

Loading…
Cancel
Save