Skip to content

Commit

Permalink
Merge pull request #204 from mlibrary/build_suggesters_one_at_a_time
Browse files Browse the repository at this point in the history
Build suggesters "manually" against each solr replica
  • Loading branch information
billdueber authored Sep 19, 2024
2 parents de692f0 + 6658b85 commit b9a9a71
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ gem "middle_english_dictionary", git: "https://github.com/mlibrary/middle_englis
gem "pg"
gem "date_named_file"
gem "zinzout"
gem "solr_cloud-connection"
gem "solr_cloud-connection", ">=0.6.0"

gem "shrine", "~> 3.6"
gem "aws-sdk-s3", "~> 1.160"
Expand Down
8 changes: 4 additions & 4 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ GEM
hashie (5.0.0)
html_truncator (0.4.2)
nokogiri (~> 1.5)
http-2 (1.0.0)
http-2 (1.0.1)
http (5.1.1)
addressable (~> 2.8)
http-cookie (~> 1.0)
Expand All @@ -159,7 +159,7 @@ GEM
domain_name (~> 0.5)
http-form_data (2.3.0)
httpclient (2.8.3)
httpx (1.3.0)
httpx (1.3.1)
http-2 (>= 1.0.0)
i18n (1.12.0)
concurrent-ruby (~> 1.0)
Expand Down Expand Up @@ -377,7 +377,7 @@ GEM
simplecov-html (0.12.3)
simplecov_json_formatter (0.1.4)
slop (4.10.1)
solr_cloud-connection (0.5.0)
solr_cloud-connection (0.6.0)
faraday (~> 2.0)
httpx (~> 1.0)
rubyzip (~> 2.0)
Expand Down Expand Up @@ -493,7 +493,7 @@ DEPENDENCIES
simple_form (~> 5.0)
simple_solr_client
simplecov
solr_cloud-connection
solr_cloud-connection (>= 0.6.0)
sprockets (~> 3.7.2)
sqlite3 (~> 1.3.13)
standard
Expand Down
5 changes: 4 additions & 1 deletion compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ services:
- ALLOW_ADMIN_ACCESS=1
- RAILS_RELATIVE_URL_ROOT=/m/middle-english-dictionary
- TZ=America/New_York
- "DIRECT_URLS_TO_SOLR_REPLICAS=http://solr:8983 http://solr:8983"
- MANUALLY_BUILD_SUGGESTERS=true
- PAUSE_TIME=10
env_file:
- path: .app.env
required: false
Expand All @@ -53,7 +56,7 @@ services:
solr:
build: solr/.
ports:
- "9090:8983"
- "9172:8983"
environment:
- ZK_HOST=zoo:2181
depends_on:
Expand Down
13 changes: 13 additions & 0 deletions lib/dromedary/services.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ module Dromedary

################ Reindexing stuff ################

# For "manually" build suggesters using direct links to cluster replicas, because
# the cluster was losing nodes and the suggester indexes weren't getting build.

Services.register(:direct_urls_to_solr_replicas) do
ENV["DIRECT_URLS_TO_SOLR_REPLICAS"] or nil
end

Services.register(:manually_build_suggesters) do
val = ENV["MANUALLY_BUILD_SUGGESTERS"]
val =~ /\S/ and !(["false", 0, "0"].include? val.downcase)
end


Services.register(:build_root) do
br = Pathname.new(ENV["BUILD_ROOT"])
br.mkpath
Expand Down
84 changes: 76 additions & 8 deletions lib/med_installer/indexing_steps.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
require "med_installer/hyp_to_bibid"
require "solr_cloud/connection"
require "traject"
require "yaml"

module MedInstaller
# Run a complete indexing workflow, including
Expand Down Expand Up @@ -40,10 +41,45 @@ def initialize(zipfile:,
@connection = connection
@zipfile = zipfile
@coll_and_configset_name = Services[:name_of_solr_collection_to_index_into]

end

def index

# Do some basic checks against the solr

url = Dromedary::Services[:solr_url]
connection_url = @connection.url
logger.info "Trying to connect to #{Dromedary::Services[:solr_url]}"
logger.info "Connection thinks its url is #{@connection.url}"
logger.debug "System: #{@connection.system.to_yaml}\n\n"

# We were dealing with some instabilities in the k8s cluster in 2024.09 when we wanted
# to release this, resulting nodes loosing track of zookeeper and suggesters not getting
# built (calls would time out). Attempt to brute-force this by hitting the individual
# solr URLs directly, taking advantage of knowledge about how many replicas and what
# they're called that we'd really rather not have to know.
#
# If things are working as planned, we can just make the single call to
# `rebuild_suggesters` and accept the default connection and call it a day.
#
# To run in once for each configured solr replica, we need the following:
# * ENV[DIRECT_URLS_TO_SOLR_REPLICAS]: A space-delimited set of urls of the form
# "http://solr-solrcloud-1:8083" or whatever. This is the same format as the
# generic (cluster-level) connection string found in ENV[SOLR_URL]
# * A non-falsey value for ENV[MANUALLY_BUILD_SUGGESTERS] to enable it.
# @dueberb 2024.09.17

logger.info "Checking to see if we should try to build suggesters on each solr replica individually"
direct_urls_string = Services[:direct_urls_to_solr_replicas]
if direct_replica_urls and Services[:manually_build_suggesters]
logger.info "Will target #{direct_replica_urls.count} replicas for 'manual' builds of suggester index:"
direct_replica_urls.each do |u|
logger.info "- '#{u}'"
end
else
logger.info "Nope. Will just target the single logical solr url"
end

prepare_build_directory
extract_zip_to_build_directory
verify_unzipped_files!
Expand All @@ -69,7 +105,26 @@ def index
index_bibs(solr_url: collection_url)

@build_collection.commit
rebuild_suggesters

if direct_replica_urls and Services[:manually_build_suggesters]
urls = direct_replica_urls
pause_time = (ENV["PAUSE_TIME"] || 60).to_i
half_pause_time = pause_time / 2
logger.info "Sleeping for #{pause_time} seconds so things can crash and restart if that's what they're doing."
sleep half_pause_time # Let whatever restarts are going to happen, happen.
logger.info "...#{half_pause_time}"
sleep (pause_time - half_pause_time)
logger.info "...#{pause_time}"
urls.each do |direct_url|
logger.info "Rebuild suggesters at '#{direct_url}'"
conn = SolrCloud::Connection.new(url: direct_url, user: @connection.user, password: @connection.password)
rebuild_suggesters(connection: conn)
end
else
logger.info "Rebuilding suggesters against just the default #{@connection}"
rebuild_suggesters # This is the "happy path" if the k8s cluster is behaving
end

@build_collection.commit

logger.info "Cleaning up: remove temporary files"
Expand Down Expand Up @@ -160,14 +215,20 @@ def index_bibs(solr_url:)

# Rebuild the suggesters that provide autocomplete/typeahead functionality for @build_collection
# @param rails_env [String] "production" or "development"
def rebuild_suggesters(rails_env: (ENV["RAILS_ENV"] || "production"))
logger.info "Recreating suggest indexes"
def rebuild_suggesters(rails_env: (ENV["RAILS_ENV"] || "production"),
collection_name: @build_collection.name,
connection: @connection)
logger.info "Recreating suggest indexes for #{collection_name}"
autocomplete_filename = Services[:root_directory] + "config" + "autocomplete.yml"
autocomplete_map = YAML.safe_load(ERB.new(File.read(autocomplete_filename)).result, aliases: true)[rails_env]
autocomplete_map.keys.each do |key|
suggester_path = autocomplete_map[key]["solr_endpoint"]
logger.info " Recreate suggester for #{suggester_path}"
resp = @build_collection.get "solr/#{@build_collection.name}/#{suggester_path}", { "suggest.build" => "true" }
autocomplete_map.keys.each do |suggester_name|
suggester_path = autocomplete_map[suggester_name]["solr_endpoint"]
logger.info " Recreate suggester for #{suggester_name} in #{collection_name} at #{connection.url}"
begin
resp = connection.get "solr/#{collection_name}/#{suggester_path}", { "suggest.build" => "true" }
rescue => e
raise "Error trying to build suggester : #{e.message}"
end
end
end

Expand All @@ -176,6 +237,13 @@ def upload_hyp_to_bibid_to_solr
filepath = Pathname.new(@build_dir) + "hyp_to_bibid.json"
MedInstaller::HypToBibId.dump_file_to_solr(collection: @build_collection, filename: filepath.to_s)
end

# Parse out URLS
def direct_replica_urls
return nil unless Services[:direct_urls_to_solr_replicas] && (Services[:direct_urls_to_solr_replicas] =~ /\S/)
Services[:direct_urls_to_solr_replicas].split(/\s+/).map{|x| x.strip}.reject{|x| x == "" or x.nil?}
end

end
end

0 comments on commit b9a9a71

Please sign in to comment.