Skip to content

Commit

Permalink
Merge pull request #63 from hathitrust/DEV-973-check-correct-journal
Browse files Browse the repository at this point in the history
DEV-973: check correct journal file for continue
  • Loading branch information
aelkiss authored Dec 17, 2024
2 parents f73dca4 + d8839d2 commit 80237bd
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ WORKDIR /app

# USER $UNAME
ENV BUNDLE_PATH /gems
RUN gem install bundler
RUN gem install bundler --version "~> 2.5.23"

FROM base AS development

Expand Down
2 changes: 1 addition & 1 deletion lib/cictl/index_command.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def continue
# If there is a missing journal, start indexing from that point.
else
(last_full.to_datetime.to_date..(Date.today - 1)).each do |date|
journal = Journal.new(date: last_full.to_datetime.to_date, full: false)
journal = Journal.new(date: date, full: false)
if journal.missing?
logger.info "missing update journal #{journal}, calling `cictl since #{journal.date}`"
call_since_command(journal.date)
Expand Down
33 changes: 28 additions & 5 deletions spec/cictl/index_command_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ def metrics?
metrics.match?(/^job_records_processed\S*job="#{job_name}"\S* \S+/m)
end

# records may be updated multiple times, so we need to dedupe those
def unique_id_count(examples)
examples
.map { |ex| ex[:ids] }
.flatten
.uniq
.count
end

around(:each) do |example|
job_name = HathiTrust::Services[:job_name]
Faraday.delete("#{ENV["PUSHGATEWAY"]}/metrics/job/#{job_name}")
Expand All @@ -40,12 +49,10 @@ def metrics?
CICTL::Examples.journal_for(example: ex).write!
end
update_file_count = CICTL::Examples.of_type(:upd).count
update_ids = CICTL::Examples.of_type(:upd, :delete).each_with_object([]) do |ex, ids|
ex[:ids].each { |id| ids << id }
end.uniq
update_id_count = unique_id_count(CICTL::Examples.of_type(:upd, :delete))
old_journal_count = Dir.children(HathiTrust::Services[:journal_directory]).count
CICTL::Commands.start(["index", "continue", "--quiet", "--log", test_log])
expect(solr_count).to eq update_ids.count
expect(solr_count).to eq update_id_count
expect(Dir.children(HathiTrust::Services[:journal_directory]).count).to eq(old_journal_count + update_file_count)
expect(metrics?).to eq true
end
Expand All @@ -63,6 +70,22 @@ def metrics?
expect(metrics?).to eq true
end
end

context "with one day to process" do
it "indexes it and writes the journal" do
latest_examples = CICTL::Examples.of_type(:upd, :delete).select { |e| e[:date] == "20230103" }

CICTL::Examples.of_type(:full, :upd)
.reject { |e| e[:date] == "20230103" }
.each do |ex|
CICTL::Examples.journal_for(example: ex).write!
end

CICTL::Commands.start(["index", "continue", "--quiet", "--log", test_log])
expect(solr_count).to eq unique_id_count(latest_examples)
expect(File.exist?(File.join(HathiTrust::Services[:journal_directory], "hathitrust_catalog_indexer_journal_upd_20230103.txt"))).to be(true)
end
end
end

describe "#index all" do
Expand Down Expand Up @@ -119,7 +142,7 @@ def metrics?
it "indexes full records and deleted record from example date" do
examples = CICTL::Examples.for_date("20230103")
CICTL::Commands.start(["index", "date", "20230103", "--log", test_log])
expect(solr_count).to eq examples.map { |ex| ex[:ids] }.flatten.uniq.count
expect(solr_count).to eq unique_id_count(examples)
expect(File.exist?(CICTL::Journal.new(date: Date.new(2023, 1, 3)).path)).to eq(true)
expect(metrics?).to eq true
end
Expand Down

0 comments on commit 80237bd

Please sign in to comment.