-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adds translation map generator for sh remediation
Translation map is a bit of a misnomer since more than a one to one mapping. It's only the same in that it's some config that's generated before doing indexing.
- Loading branch information
Showing
11 changed files
with
721 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
ALMA_API_KEY='YOUR_ALMA_API_KEY' | ||
NODB=1 | ||
SUPERVISOR_ON='true' | ||
SUBJECT_HEADING_REMEDIATION_SET_ID="YOUR_SET_ID" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
89 changes: 89 additions & 0 deletions
89
umich_catalog_indexing/lib/jobs/translation_map_generator/subject_heading_remediation.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
module Jobs | ||
module TranslationMapGenerator | ||
module SubjectHeadingRemediation | ||
class << self | ||
include FileWriter | ||
def name | ||
"Subject Headings mapping" | ||
end | ||
|
||
# @returns [String] where in the translation map directory the file | ||
# should go | ||
def file_path | ||
File.join("umich", "subject_heading_remediation.json") | ||
end | ||
|
||
# @returns [String] JSON string of mapping | ||
def generate | ||
JSON.pretty_generate(Set.for(S.subject_heading_remediation_set_id).to_a) | ||
end | ||
end | ||
|
||
class Set | ||
def self.for(id) | ||
resp = AlmaRestClient.client.get_all(url: "conf/sets/#{id}/members", record_key: "members") | ||
raise StandardError, "Couldn't retrieve authority set data for #{id}" if resp.status != 200 | ||
new(resp.body) | ||
end | ||
|
||
def initialize(data) | ||
@data = data | ||
end | ||
|
||
def ids | ||
@data["member"].map { |x| x["id"] } | ||
end | ||
|
||
def authority_records | ||
ids.map do |id| | ||
Authority.for(id) | ||
end | ||
end | ||
|
||
def to_a | ||
authority_records.map { |x| x.to_h } | ||
end | ||
end | ||
|
||
class Authority | ||
SUBFIELDS = ["a", "v", "x", "y", "z"] | ||
def self.for(authority_record_id) | ||
resp = AlmaRestClient.client.get("bibs/authorities/#{authority_record_id}", query: {view: "full"}) | ||
raise StandardError, "Couldn't retrieve authority data for #{authority_record_id}" if resp.status != 200 | ||
new(resp.body) | ||
end | ||
|
||
def initialize(data) | ||
@record = MARC::XMLReader.new(StringIO.new(data["anies"]&.first)).first | ||
end | ||
|
||
def remediated_term | ||
out_hash = Hash.new { |h, key| h[key] = [] } | ||
@record.fields("150").first.subfields.each do |sf| | ||
out_hash[sf.code].push(sf.value) if SUBFIELDS.include?(sf.code) | ||
end | ||
out_hash | ||
end | ||
|
||
def deprecated_terms | ||
@record.fields("450").map do |field| | ||
out_hash = Hash.new { |h, key| h[key] = [] } | ||
field.subfields.each do |sf| | ||
out_hash[sf.code].push(sf.value) if SUBFIELDS.include?(sf.code) | ||
end | ||
out_hash | ||
end.sort do |a, b| | ||
b.keys.count <=> a.keys.count | ||
end | ||
end | ||
|
||
def to_h | ||
{ | ||
"150" => remediated_term, | ||
"450" => deprecated_terms | ||
} | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
23 changes: 23 additions & 0 deletions
23
umich_catalog_indexing/spec/fixtures/subjects/authority_record.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
"mms_id": 98187481368106380, | ||
"record_format": "marc21_authority", | ||
"title": "Undocumented immigrants", | ||
"created_by": "System", | ||
"created_date": "2021-11-17Z", | ||
"last_modified_by": "rednaal", | ||
"last_modified_date": "2023-03-29Z", | ||
"originating_system": "LIBRARY_OF_CONGRESS", | ||
"originating_system_id": "98184898010106381", | ||
"cataloging_level": { | ||
"value": "00", | ||
"desc": "Default Level" | ||
}, | ||
"vocabulary": { | ||
"value": "MIUSH", | ||
"desc": "miush" | ||
}, | ||
"anies": [ | ||
"<?xml version=\"1.0\" encoding=\"UTF-16\"?><record><leader>01200cz a2200301n 4500</leader><controlfield tag=\"005\">20230329130030.0</controlfield><controlfield tag=\"008\">030627i| anannbabn |a ana </controlfield><controlfield tag=\"001\">98187481368106381</controlfield><datafield ind1=\" \" ind2=\" \" tag=\"010\"><subfield code=\"a\">sh 85003553</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"035\"><subfield code=\"a\">(DLC)sh 85003553</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"035\"><subfield code=\"a\">(LIBRARY_OF_CONGRESS)98171057700000041</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"040\"><subfield code=\"a\">DLC</subfield><subfield code=\"c\">DLC</subfield><subfield code=\"d\">DLC</subfield><subfield code=\"d\">WaU</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"150\"><subfield code=\"a\">Undocumented immigrants</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"450\"><subfield code=\"a\">Undocumented foreign nationals</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"450\"><subfield code=\"a\">Illegal aliens</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"450\"><subfield code=\"a\">Aliens</subfield><subfield code=\"x\">Legal status, laws, etc.</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"450\"><subfield code=\"w\">nne</subfield><subfield code=\"a\">Aliens, Illegal</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"450\"><subfield code=\"a\">Illegal aliens</subfield><subfield code=\"x\">Legal status, laws, etc.</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"450\"><subfield code=\"a\">Illegal immigrants</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"450\"><subfield code=\"a\">Undocumented noncitizens</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"550\"><subfield code=\"w\">g</subfield><subfield code=\"a\">Aliens</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"550\"><subfield code=\"a\">Immigrant detention centers</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"550\"><subfield code=\"a\">Human smuggling</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"550\"><subfield code=\"a\">Noncitizens</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"550\"><subfield code=\"a\">Illegal immigration</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"670\"><subfield code=\"a\">Work cat.: 2007017970: Illegal immigration, 2007:</subfield><subfield code=\"b\">eCIP data sheet (Illegal immigrants)</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"690\"><subfield code=\"a\">sla-lab updated based on DEIA Catalog Working Group changes May 2021; \"Undocumented immigrants\" term borrowed from Sears ; \"undocumented foreign national\" term from Bill H.R. 3776 (116th Congress)</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"690\"><subfield code=\"a\">sla-lab updated to include 550s for LCSH headings as references March 2023</subfield></datafield></record>" | ||
], | ||
"link": "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/authorities/98187481368106381" | ||
} |
23 changes: 23 additions & 0 deletions
23
umich_catalog_indexing/spec/fixtures/subjects/authority_record2.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
"mms_id": 999, | ||
"record_format": "marc21_authority", | ||
"title": "Whatever", | ||
"created_by": "System", | ||
"created_date": "2021-11-17Z", | ||
"last_modified_by": "rednaal", | ||
"last_modified_date": "2023-03-29Z", | ||
"originating_system": "LIBRARY_OF_CONGRESS", | ||
"originating_system_id": "98184898010106381", | ||
"cataloging_level": { | ||
"value": "00", | ||
"desc": "Default Level" | ||
}, | ||
"vocabulary": { | ||
"value": "MIUSH", | ||
"desc": "miush" | ||
}, | ||
"anies": [ | ||
"<?xml version=\"1.0\" encoding=\"UTF-16\"?><record><leader>01200cz a2200301n 4500</leader><controlfield tag=\"005\">20230329130030.0</controlfield><controlfield tag=\"008\">030627i| anannbabn |a ana </controlfield><controlfield tag=\"001\">98187481368106381</controlfield><datafield ind1=\" \" ind2=\" \" tag=\"010\"><subfield code=\"a\">sh 85003553</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"035\"><subfield code=\"a\">(DLC)sh 85003553</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"035\"><subfield code=\"a\">(LIBRARY_OF_CONGRESS)98171057700000041</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"040\"><subfield code=\"a\">DLC</subfield><subfield code=\"c\">DLC</subfield><subfield code=\"d\">DLC</subfield><subfield code=\"d\">WaU</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"150\"><subfield code=\"a\">Whatever</subfield><subfield code=\"x\">First x field</subfield><subfield code=\"x\">Second x field</subfield><subfield code=\"v\">First v field</subfield><subfield code=\"v\">Second v field</subfield><subfield code=\"y\">First y field</subfield><subfield code=\"y\">Second y field</subfield><subfield code=\"z\">First z field</subfield><subfield code=\"z\">Second z field</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"450\"><subfield code=\"a\">Stuff</subfield><subfield code=\"x\">First deprecated x field</subfield><subfield code=\"x\">Second deprecated x field</subfield><subfield code=\"v\">First deprecated v field</subfield><subfield code=\"v\">Second deprecated v field</subfield><subfield code=\"y\">First deprecated y field</subfield><subfield code=\"y\">Second deprecated y field</subfield><subfield code=\"z\">First deprecated z field</subfield><subfield code=\"z\">Second deprecated z field</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"670\"><subfield code=\"a\">Work cat.: 2007017970: Illegal immigration, 2007:</subfield><subfield code=\"b\">eCIP data sheet (Illegal immigrants)</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"690\"><subfield code=\"a\">sla-lab updated based on DEIA Catalog Working Group changes May 2021; \"Whatever\" term borrowed from Sears ; \"undocumented foreign national\" term from Bill H.R. 3776 (116th Congress)</subfield></datafield><datafield ind1=\" \" ind2=\" \" tag=\"690\"><subfield code=\"a\">sla-lab updated to include 550s for LCSH headings as references March 2023</subfield></datafield></record>" | ||
], | ||
"link": "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/authorities/98187481368106381" | ||
} |
9 changes: 9 additions & 0 deletions
9
umich_catalog_indexing/spec/fixtures/subjects/authority_set.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"member": [ | ||
{ | ||
"id": "98187481368106381", | ||
"description": "Undocumented immigrants" | ||
} | ||
], | ||
"total_record_count": 1 | ||
} |
Oops, something went wrong.