Skip to content

Commit

Permalink
adds translation map generator for floor locations
Browse files Browse the repository at this point in the history
  • Loading branch information
niquerio committed Oct 3, 2024
1 parent 9377275 commit 87db44f
Show file tree
Hide file tree
Showing 7 changed files with 309 additions and 0 deletions.
1 change: 1 addition & 0 deletions umich_catalog_indexing/Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ gem "sequel", "~>5.0"
gem "httpclient", "~>2.0"
gem "library_stdnums", "~>1.0"
gem "sidekiq"
gem "google-apis-sheets_v4"

platforms :jruby do
gem "naconormalizer"
Expand Down
37 changes: 37 additions & 0 deletions umich_catalog_indexing/Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ GEM
connection_pool (2.4.1)
crack (0.4.5)
rexml
declarative (0.0.20)
diff-lcs (1.5.0)
docile (1.4.0)
domain_name (0.6.20240107)
Expand All @@ -64,6 +65,25 @@ GEM
ffi-compiler (1.0.1)
ffi (>= 1.0.0)
rake
google-apis-core (0.15.1)
addressable (~> 2.5, >= 2.5.1)
googleauth (~> 1.9)
httpclient (>= 2.8.3, < 3.a)
mini_mime (~> 1.0)
mutex_m
representable (~> 3.0)
retriable (>= 2.0, < 4.a)
google-apis-sheets_v4 (0.36.0)
google-apis-core (>= 0.15.0, < 2.a)
google-cloud-env (2.2.0)
faraday (>= 1.0, < 3.a)
googleauth (1.11.0)
faraday (>= 1.0, < 3.a)
google-cloud-env (~> 2.1)
jwt (>= 1.4, < 3.0)
multi_json (~> 1.11)
os (>= 0.9, < 2.0)
signet (>= 0.16, < 2.a)
hashdiff (1.1.0)
hashie (5.0.0)
high_level_browse (1.1.1)
Expand All @@ -87,6 +107,8 @@ GEM
i18n (1.14.1)
concurrent-ruby (~> 1.0)
json (2.7.1)
jwt (2.9.1)
base64
language_server-protocol (3.17.0.3)
library_stdnums (1.6.0)
lint_roller (1.1.0)
Expand All @@ -103,6 +125,7 @@ GEM
method_source (1.0.0)
mini_mime (1.1.5)
minitest (5.22.2)
multi_json (1.15.0)
multi_xml (0.6.0)
mutex_m (0.2.0)
mysql2 (0.5.5)
Expand All @@ -120,6 +143,7 @@ GEM
racc (~> 1.4)
nokogiri (1.16.0-x86_64-linux)
racc (~> 1.4)
os (1.1.4)
parallel (1.24.0)
parser (3.3.0.5)
ast (~> 2.4.1)
Expand All @@ -136,6 +160,11 @@ GEM
redis-client (0.19.1)
connection_pool
regexp_parser (2.9.0)
representable (3.2.0)
declarative (< 0.1.0)
trailblazer-option (>= 0.1.1, < 0.2.0)
uber (< 0.2.0)
retriable (3.1.2)
rexml (3.2.6)
rspec (3.12.0)
rspec-core (~> 3.12.0)
Expand Down Expand Up @@ -178,6 +207,11 @@ GEM
connection_pool (>= 2.3.0)
rack (>= 2.2.4)
redis-client (>= 0.14.0)
signet (0.19.0)
addressable (~> 2.8)
faraday (>= 0.17.5, < 3.a)
jwt (>= 1.5, < 3.0)
multi_json (~> 1.10)
simplecov (0.22.0)
docile (~> 1.1)
simplecov-html (~> 0.11)
Expand All @@ -203,6 +237,7 @@ GEM
standard-performance (1.3.1)
lint_roller (~> 1.1)
rubocop-performance (~> 1.20.2)
trailblazer-option (0.1.2)
traject (3.8.2)
concurrent-ruby (>= 0.8.0)
dot-properties (>= 0.1.1)
Expand All @@ -216,6 +251,7 @@ GEM
yell
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
uber (0.1.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.9.1)
Expand All @@ -242,6 +278,7 @@ DEPENDENCIES
bundler (~> 2.0)
byebug
canister
google-apis-sheets_v4
high_level_browse (>= 1.1)
httparty
httpclient (~> 2.0)
Expand Down
2 changes: 2 additions & 0 deletions umich_catalog_indexing/env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ ALMA_API_KEY='YOUR_ALMA_API_KEY'
NODB=1
SUPERVISOR_ON='true'
SUBJECT_HEADING_REMEDIATION_SET_ID="YOUR_SET_ID"
FLOOR_LOCATION_SPREADSHEET_ID="your_spreadsheet_id"
GOOGLE_API_CREDENTIALS="your_api_credentials_json_string_for_floor_location_spreadsheet"
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,4 @@ def write_to_file(path)
require_relative "translation_map_generator/lib_loc_info"
require_relative "translation_map_generator/high_level_browse"
require_relative "translation_map_generator/subject_heading_remediation"
require_relative "translation_map_generator/floor_locations"
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
require "googleauth"
require "google/apis/sheets_v4"

module Jobs
module TranslationMapGenerator
module FloorLocations
class << self
include FileWriter
# @returns [String] name of the translation map
def name
"Floor Locations"
end

# @returns [String] where in the translation map directory the file
# should go
def file_path
File.join("umich", "floor_locations.json")
end

# @returns [String] YAML string of translation map
def generate
fetch.to_json
end

def fetch
client = Google::Apis::SheetsV4::SheetsService.new
scope = "https://www.googleapis.com/auth/spreadsheets.readonly"
authorizer = Google::Auth::ServiceAccountCredentials.make_creds(
json_key_io: StringIO.new(S.google_api_credentials),
scope: scope
)
client.authorization = authorizer
range = "Sheet1!A2:G100"
data = client.get_spreadsheet_values(S.floor_location_spreadsheet_id, range).values
_generate_data_structure(data)
end

def _generate_data_structure(data)
output = {}
data.each do |row|
next if row[0].nil?
fl = FloorLocation.new(row)
output[fl.library] ||= {fl.location => []}
output[fl.library][fl.location] ||= []
output[fl.library][fl.location].push(fl.to_h)
end
output
end
end

class FloorLocation
attr_reader :library, :location, :text, :code
def initialize(data)
@library = data[0].strip
@location = data[1].strip
@call_number_range = _clean_call_number_range(data[2])
@code = data[5].strip
@text = data[6].strip
@data = data
end

def call_number_start
return nil if type == "Everything"

cn = @call_number_range[0]
return cn.to_f if type == "Dewey"

if _music_match?(cn)
cn = _music_call_number(cn)
elsif _asia_match?(cn)
cn = _asia_call_number(cn)
elsif _number_ending?(cn)
cn = _number_ending_call_number(cn)
end
cn.gsub(/\s+/, "")
end

def call_number_end
return nil if type == "Everything"

cn = @call_number_range[1] || @call_number_range[0]
return (cn + ".9999").to_f if type == "Dewey"

if _music_match?(cn)
cn = _music_call_number(cn)
elsif _asia_match?(cn)
cn = _asia_call_number(cn)
elsif _number_ending?(cn)
cn = _number_ending_call_number(cn)
end

(cn + "z").gsub(/\s+/, "")
end

def type
if @call_number_range.empty?
"Everything"
elsif @call_number_range[0].match?(/^\d/)
"Dewey"
else
"LC"
end
end

def to_h
{
"library" => library,
"collection" => location,
"start" => call_number_start,
"end" => call_number_end,
"floor_key" => code,
"text" => text,
"type" => type
}
end

def _clean_call_number_range(str)
str.downcase.split(/\s+-\s+/).map { |x| x.strip }
end

def _music_match?(cn)
library == "MUSIC" && cn.match?(/^m/)
end

def _music_call_number(cn)
parts = cn.split(/\s+/)
"#{parts[0]}000#{parts[1]}.00000#{parts[2]}"
end

def _asia_match?(cn)
location == "ASIA" && cn.match?(".")
end

def _asia_call_number(cn)
parts = cn.split(/\s+/)
"#{parts[0]}0#{parts[1]}000"
end

def _number_ending?(cn)
cn.match?(/\d$/)
end

def _number_ending_call_number(cn)
cn + ".00000"
end
end
end
end
end
3 changes: 3 additions & 0 deletions umich_catalog_indexing/lib/services.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

S.register(:app_env) { ENV["APP_ENV"] || "development" }

S.register(:google_api_credentials) { ENV["GOOGLE_API_CREDENTIALS"] || "{}" }
S.register(:floor_location_spreadsheet_id) { ENV["FLOOR_LOCATION_SPREADSHEET_ID"] || "" }

require_relative "services/paths"
require_relative "services/logger"
require_relative "services/dbs"
Expand Down
Loading

0 comments on commit 87db44f

Please sign in to comment.