Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Libsearch 944 language codes in 041 #95

Merged
merged 2 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion umich_catalog_indexing/indexers/common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,37 @@ def ordinalize_incomplete_year(s)
# mrio: updated Feb 2022 to add "b"
to_field "edition", extract_marc("250ab")

to_field "language", marc_languages("008[35-37]:041a:041d:041e:041j")
# reimplemented marc_languages macro to skip fields with a subfield 2
to_field "language" do |record, accumulator, context|
translation_map = Traject::TranslationMap.new("marc_languages")

spec = "008[35-37]:041a:041d:041e:041j"
extractor = MarcExtractor.new(spec, separator: nil)

codes = extractor.collect_matching_lines(record) do |field, spec, extractor|
if extractor.control_field?(field)
(spec.bytes ? field.value.byteslice(spec.bytes) : field.value)
elsif !field["2"].nil?
next
else
extractor.collect_subfields(field, spec).collect do |value|
# sometimes multiple language codes are jammed together in one subfield, and
# we need to separate ourselves. sigh.
unless value.length == 3
# split into an array of 3-length substrs; JRuby has problems with regexes
# across threads, which is why we don't use String#scan here.
value = value.chars.each_slice(3).map(&:join)
end
value
end.flatten
end
end
codes = codes.uniq

translation_map.translate_array!(codes)

accumulator.concat codes
end # , marc_languages("008[35-37]:041a:041d:041e:041j")

to_field "language008", extract_marc("008[35-37]", first: true) do |r, acc|
acc.reject! { |x| x !~ /\S/ } # ditch only spaces
Expand Down
3 changes: 0 additions & 3 deletions umich_catalog_indexing/spec/common/subject/subject_spec.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
require "common/subjects"
RSpec.describe Common::Subjects::Subject do
def get_record(path)
MARC::XMLReader.new(path).first
end
let(:record) do
get_record("./spec/fixtures/unauthorized_immigrants.xml")
end
Expand Down
3 changes: 0 additions & 3 deletions umich_catalog_indexing/spec/common/subjects_spec.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
require "common/subjects"
require "marc"
RSpec.describe Common::Subjects do
def get_record(path)
MARC::XMLReader.new(path).first
end
let(:record) do
get_record("./spec/fixtures/unauthorized_immigrants.xml")
end
Expand Down
128 changes: 128 additions & 0 deletions umich_catalog_indexing/spec/fixtures/grudencz.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
<?xml version="1.0" encoding="UTF-8"?>
<collection>
<record>
<leader> ncm a22003131a 4500</leader>
<controlfield tag="005">20040421000000.0</controlfield>
<controlfield tag="008">990802s1993 pl moa ehi n lat d</controlfield>
<controlfield tag="001">990040063470106381</controlfield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(MiU)004006347MIU01</subfield>
</datafield>
<datafield tag="020" ind1=" " ind2=" ">
<subfield code="a">8322430736</subfield>
<subfield code="a">8888888888</subfield>
</datafield>
<datafield tag="028" ind1="2" ind2="2">
<subfield code="a">PWM-8680</subfield>
<subfield code="b">Polsike Wydawn. Muzyczne</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(RLIN)MIUG99-C817</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(III)iiio31544307</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(OCoLC)ocm39819963</subfield>
</datafield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">MH-Mu</subfield>
<subfield code="c">MH-Mu</subfield>
<subfield code="d">MiU</subfield>
</datafield>
<datafield tag="041" ind1="0" ind2=" ">
<subfield code="a">latcze</subfield>
<subfield code="g">czeenggerpol</subfield>
</datafield>
<datafield tag="100" ind1="1" ind2=" ">
<subfield code="a">Wilhelmi de Grudencz, Petrus,</subfield>
<subfield code="d">approximately 1400-approximately 1480.</subfield>
<subfield code="0">http://id.loc.gov/authorities/names/no95005776</subfield>
<subfield code="0">http://viaf.org/viaf/39567283</subfield>
</datafield>
<datafield tag="240" ind1="1" ind2="0">
<subfield code="a">Works.</subfield>
<subfield code="f">1993</subfield>
</datafield>
<datafield tag="245" ind1="1" ind2="0">
<subfield code="a">Opera musica /</subfield>
<subfield code="c">Petrus Wilhelmi de Grudencz, magister Cracoviensis ; edidit Jaromír CÌerný.</subfield>
</datafield>
<datafield tag="250" ind1=" " ind2=" ">
<subfield code="a">Wyd. 1.</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="a">Krakó w :</subfield>
<subfield code="b">Polskie Wydawn. Muzyczne,</subfield>
<subfield code="c">1993.</subfield>
</datafield>
<datafield tag="300" ind1=" " ind2=" ">
<subfield code="a">1 score (146 p.) :</subfield>
<subfield code="b">facsims. ;</subfield>
<subfield code="c">31 cm.</subfield>
</datafield>
<datafield tag="500" ind1=" " ind2=" ">
<subfield code="a">Vocal music for 1-5 voices.</subfield>
</datafield>
<datafield tag="546" ind1=" " ind2=" ">
<subfield code="a">Words in Latin and Czech.</subfield>
</datafield>
<datafield tag="546" ind1=" " ind2=" ">
<subfield code="a">Introd. in Polish, Czech and German; editorial notes in English.</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="0">
<subfield code="a">Vocal music.</subfield>
<subfield code="0">http://id.loc.gov/authorities/subjects/sh85144088</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="0">
<subfield code="a">Motets.</subfield>
<subfield code="0">http://id.loc.gov/authorities/subjects/sh85087515</subfield>
</datafield>
<datafield tag="700" ind1="1" ind2=" ">
<subfield code="a">Äerný, Jaromí r,</subfield>
<subfield code="d">1939-</subfield>
<subfield code="0">http://id.loc.gov/authorities/names/n84100249</subfield>
<subfield code="0">http://viaf.org/viaf/52115348</subfield>
</datafield>
<datafield tag="998" ind1=" " ind2=" ">
<subfield code="c">LMH</subfield>
<subfield code="s">9114</subfield>
</datafield>
<datafield tag="958" ind1=" " ind2=" ">
<subfield code="a">MiU</subfield>
</datafield>
<datafield tag="959" ind1=" " ind2=" ">
<subfield code="a">(notis)ULBAM3997</subfield>
</datafield>
<datafield tag="995" ind1=" " ind2=" ">
<subfield code="a">20</subfield>
</datafield>
<datafield tag="BIB" ind1=" " ind2=" ">
<subfield code="u">2022-08-15 04:23:45 US/Eastern</subfield>
<subfield code="c">2021-06-21 18:24:57 US/Eastern</subfield>
<subfield code="s">false</subfield>
</datafield>
<datafield tag="852" ind1="0" ind2=" ">
<subfield code="b">MUSIC</subfield>
<subfield code="a">MiU</subfield>
<subfield code="c">NONE</subfield>
<subfield code="h">M 3 .P52 1993</subfield>
<subfield code="8">22975380140006381</subfield>
</datafield>
<datafield tag="974" ind1=" " ind2=" ">
<subfield code="8">22975380140006381</subfield>
<subfield code="f">1</subfield>
<subfield code="c">NONE</subfield>
<subfield code="m">SCORE</subfield>
<subfield code="a">39015040218748</subfield>
<subfield code="e">NONE</subfield>
<subfield code="u">mdp.39015040218748 ic 20201202</subfield>
<subfield code="7">23975380130006381</subfield>
<subfield code="p">08</subfield>
<subfield code="r">1998-12-16 05:59:00 US/Eastern</subfield>
<subfield code="h">M 3 .P52 1993</subfield>
<subfield code="d">MUSIC</subfield>
<subfield code="b">MUSIC</subfield>
</datafield>
</record>
</collection>
23 changes: 23 additions & 0 deletions umich_catalog_indexing/spec/indexers/common_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
describe "indexers common" do
let(:indexer) do
Traject::Indexer.new do
load_config_file("./spec/support/traject_settings.rb")
load_config_file("./indexers/common.rb")
end
end
before(:each) do
@record = get_record("spec/fixtures/grudencz.xml")
end
subject do
indexer.process_record(@record).output_hash
end
context "language" do
it "gets language from 008 and 041" do
expect(subject["language"]).to contain_exactly("Latin", "Czech")
end
it "ignores languages that have a subfield 2 in 041" do
@record["041"].append(MARC::Subfield.new("2", "some_value"))
expect(subject["language"]).to contain_exactly("Latin")
end
end
end
6 changes: 0 additions & 6 deletions umich_catalog_indexing/spec/indexers/umich_alma_spec.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
require "traject"
describe "umich_alma" do
def get_record(path)
reader = MARC::XMLReader.new(path)
for r in reader
return r
end
end
let(:hurdy_gurdy) do
get_record("./spec/fixtures/hurdy_gurdy.xml")
end
Expand Down
4 changes: 4 additions & 0 deletions umich_catalog_indexing/spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@
def fixture(path)
File.read("./spec/fixtures/#{path}")
end

def get_record(path)
MARC::XMLReader.new(path).first
end
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
require "traject"
require "umich_traject"
describe Traject::UMich::DigitalHolding do
def get_record(path)
reader = MARC::XMLReader.new(path)
for r in reader
return r
end
end
let(:arborist) do
get_record('./spec/fixtures/arborist_avd.xml')
get_record("./spec/fixtures/arborist_avd.xml")
end
let(:avd) do
arborist.fields("AVD").first
Expand Down Expand Up @@ -48,7 +42,7 @@ def get_record(path)
end
context "#to_h" do
it "returns expected hash" do
expect(subject.to_h).to eq(
expect(subject.to_h).to eq(
{
library: "ALMA_DIGITAL",
link: "https://umich-psb.alma.exlibrisgroup.com/discovery/delivery/01UMICH_INST:UMICH/121230624780006381",
Expand All @@ -61,4 +55,3 @@ def get_record(path)
end
end
end

Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
require "traject"
require "umich_traject"
describe Traject::UMich::ElectronicHolding do
def get_record(path)
MARC::XMLReader.new(path).first
end
let(:e_resource) do
get_record("./spec/fixtures/e_resource.xml")
end
Expand Down
26 changes: 10 additions & 16 deletions umich_catalog_indexing/spec/traject/umich/physical_holding_spec.rb
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
require "traject"
require "umich_traject"
describe Traject::UMich::PhysicalHolding do
def get_record(path)
reader = MARC::XMLReader.new(path)
for r in reader
return r
end
end
let(:arborist) do
get_record('./spec/fixtures/arborist.xml')
get_record("./spec/fixtures/arborist.xml")
end
let(:holding_id) { "22767949280006381" }
before(:each) do
@record = arborist
end
subject do
described_class.new(record: @record, holding_id: holding_id)
described_class.new(record: @record, holding_id: holding_id)
end
context "#institution_code" do
it "returns upcased institution code" do
Expand Down Expand Up @@ -65,7 +59,7 @@ def get_record(path)
when "c"
s.value = "GRAD"
when "h"
s.value = nil
s.value = nil
end
end
end
Expand All @@ -91,7 +85,7 @@ def get_record(path)
it "is true if any of the 974s have f=1" do
expect(subject.circulating?).to eq(true)
end
it "is false if none of the 974s have f = 1" do
it "is false if none of the 974s have f = 1" do
@record.fields("974").each do |f|
f.subfields.each do |s|
s.value = "0" if s.code == "f"
Expand All @@ -115,15 +109,15 @@ def get_record(path)
@record.fields("852").each do |f|
f.subfields.each do |s|
if s.code == "c"
s.value = nil
s.value = nil
end
end
end

@record.fields("974").each do |f|
f.subfields.each do |s|
if s.code == "c"
s.value = nil
s.value = nil
end
end
end
Expand All @@ -140,7 +134,7 @@ def get_record(path)
expect(subject.items.count).to eq(2)
end
it "doesn't include process type CA" do
@record["974"].append(MARC::Subfield.new("y","Process Status: CA"))
@record["974"].append(MARC::Subfield.new("y", "Process Status: CA"))
expect(subject.items.count).to eq(1)
end
end
Expand All @@ -152,7 +146,7 @@ def get_record(path)
s.value = "http://quod.lib.umich.edu/c/clementsead/umich-wcl-M-2015mit?view=text"
end
end
@record["856"].append(MARC::Subfield.new("y","Finding aid"))
@record["856"].append(MARC::Subfield.new("y", "Finding aid"))
expect(subject.finding_aid?).to eq(true)
end
it "returns false if there isn't a Finding aid" do
Expand All @@ -162,8 +156,8 @@ def get_record(path)
context "to_h" do
it "returns a hash with the expected keys" do
keys = [:callnumber, :display_name, :floor_location, :hol_mmsid,
:info_link, :items, :library, :location, :public_note,
:record_has_finding_aid, :summary_holdings]
:info_link, :items, :library, :location, :public_note,
:record_has_finding_aid, :summary_holdings]
expect(subject.to_h.keys.sort).to eq(keys)
end
end
Expand Down
Loading
Loading