From 188d20affc1a6dbda086506313ca567955a281ef Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 28 Feb 2024 16:10:45 -0500 Subject: [PATCH 1/2] DEV-838: Full MARC record output - marc21_full metadata format with full records (save the special Zephir-specific fields) - additional description element for Identify with reference to metadata sharing policy, access & use policy --- config/extra_description.xml | 18 ++++++++++ lib/oai_solr/marc21_full.rb | 26 ++++++++++++++ lib/oai_solr/provider.rb | 3 ++ spec/oai_solr_marc21_full_spec.rb | 59 +++++++++++++++++++++++++++++++ spec/oai_solr_marc21_spec.rb | 4 --- spec/oai_solr_spec.rb | 19 ++++++++++ 6 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 config/extra_description.xml create mode 100644 lib/oai_solr/marc21_full.rb create mode 100644 spec/oai_solr_marc21_full_spec.rb diff --git a/config/extra_description.xml b/config/extra_description.xml new file mode 100644 index 0000000..045917b --- /dev/null +++ b/config/extra_description.xml @@ -0,0 +1,18 @@ + + + + https://www.hathitrust.org/member-libraries/resources-for-librarians/data-resources/oai-feed/ + The Open Archives Initiative Protocol for Metadata Harvesting (OAI-PMH) is a protocol used in libraries and archives for the automated delivery of structured bibliographic metadata. You can use this option to retrieve metadata in MARC21 or unqualified Dublin Core formats in XML structure. The OAI feed allows you to access new and updated records and (for the full set of records) discover if any have been deleted. For best practices related to OAI, and a list of potential harvesters, see https://www.ideals.illinois.edu/items/50369. + + + https://www.hathitrust.org/the-collection/terms-conditions/metadata-sharing-and-use-policy/#bibliographic-metadata-sharing-policy + Metadata is provided under the terms of the HathiTrust Bibliographic Metadata Sharing Policy. See details at the above URL. + + + https://www.hathitrust.org/the-collection/search-access/access-use-policy/ + HathiTrust is a collaborative library initiative. Users are encouraged to cite and link to digital content and are free to do so without asking for permission. Depending on the source of the digitized work, licenses or other contractual terms may restrict further distribution or other uses. For volume-specific information, please consult the <dc:rights> element (oai_dc), 856$r (marc21), or 974$r (marc21_full). You need to make your own assessment of the copyright or other legal concerns related to uses beyond those provided by HathiTrust for particular works. + +The possible Access and Use statements that apply to each book are listed at the URL above. + + + diff --git a/lib/oai_solr/marc21_full.rb b/lib/oai_solr/marc21_full.rb new file mode 100644 index 0000000..a693521 --- /dev/null +++ b/lib/oai_solr/marc21_full.rb @@ -0,0 +1,26 @@ +require "marc" +require "oai" + +module OAISolr + class Marc21Full + ZEPHIR_FIELDS = %w[DAT CAT CID HOL FMT].to_set + + def prefix + "marc21_full" + end + + def schema + "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" + end + + def namespace + "http://www.loc.gov/MARC21/slim" + end + + def encode _, record + record.marc_record.tap do |r| + r.fields.reject! { |f| ZEPHIR_FIELDS.include?(f.tag) } + end.to_xml_string(fast_but_unsafe: true, include_namespace: true) + end + end +end diff --git a/lib/oai_solr/provider.rb b/lib/oai_solr/provider.rb index 5649eef..98ef0b5 100644 --- a/lib/oai_solr/provider.rb +++ b/lib/oai_solr/provider.rb @@ -1,6 +1,7 @@ require "oai" require "oai_solr/model" require "oai_solr/marc21" +require "oai_solr/marc21_full" require "oai_solr/dublin_core" module OAISolr @@ -11,8 +12,10 @@ class Provider < OAI::Provider::Base admin_email Settings.admin_email source_model OAISolr::Model.new register_format OAISolr::Marc21.new + register_format OAISolr::Marc21Full.new register_format OAISolr::DublinCore.instance sample_id Settings.sample_identifier update_granularity OAI::Const::Granularity::LOW + extra_description File.read("config/extra_description.xml") end end diff --git a/spec/oai_solr_marc21_full_spec.rb b/spec/oai_solr_marc21_full_spec.rb new file mode 100644 index 0000000..ad546cb --- /dev/null +++ b/spec/oai_solr_marc21_full_spec.rb @@ -0,0 +1,59 @@ +require "spec_helper" +require "oai_solr/record" +require "oai_solr/marc21" +require "json" +require "nokogiri" + +RSpec.describe OAISolr::Marc21Full do + shared_examples_for "full marc record" do |file| + let(:sdoc) { JSON.parse(File.read("spec/data/#{file}")) } + let(:oai_record) { OAISolr::Record.new(sdoc) } + let(:full_marc_xml) { described_class.new.encode(nil, oai_record) } + let(:full_marc_record) { MARC::XMLReader.new(StringIO.new(full_marc_xml)).first } + let(:slim_schema) do + Nokogiri::XML::Schema(File.open(File.dirname(__FILE__) + "/schemas/MARC21slim.xsd")) + end + + describe "#encode" do + it "provides valid marc for #{file}" do + parsed = Nokogiri::XML::Document.parse(full_marc_xml) + expect(slim_schema.valid?(parsed)).to be true + end + + it "has 974s for #{file}" do + orig = oai_record.marc_record + expect(orig.fields("974").count).to be > 0 + expect(orig.fields("974").count).to eq(full_marc_record.fields("974").count) + end + + it "has an 008 for #{file}" do + expect(full_marc_record["008"]).not_to be(nil) + end + + it "does not have special zephir fields" do + %w[CID DAT CAT FMT HOL].each do |zephir_field| + expect(full_marc_record[zephir_field]).to be nil + end + end + + it "has a title field" do + expect(full_marc_record["245"].count).to be > 0 + end + + it "has a subject field" do + expect(full_marc_record["650"].count).to be > 0 + end + + # true for the two sample records below, not necessarily + # always these indicators! + it "has indicators for title field" do + f = full_marc_record["245"] + expect(f.indicator1).to eq("1") + expect(f.indicator2).to eq("0") + end + end + end + + it_behaves_like "full marc record", "000004150.json" + it_behaves_like "full marc record", "000007599.json" +end diff --git a/spec/oai_solr_marc21_spec.rb b/spec/oai_solr_marc21_spec.rb index dfbdae4..cbe5c55 100644 --- a/spec/oai_solr_marc21_spec.rb +++ b/spec/oai_solr_marc21_spec.rb @@ -18,10 +18,6 @@ slimmed = marc21.slim_marc(rec.marc_record) parsed = Nokogiri::XML::Document.parse(slimmed.to_xml.to_s) expect(slim_schema.valid?(parsed)).to be true - # valid? is missing from the MARC gem, but it only checks for - # ControlField/DataField discrepancies anyway - # expect(rec.marc_record.valid?).to be true - # expect(marc21.slim_marc(rec.marc_record).valid?).to be true end it "replaces the 974s with 856s for #{file}" do diff --git a/spec/oai_solr_spec.rb b/spec/oai_solr_spec.rb index 397ccf7..0362dca 100644 --- a/spec/oai_solr_spec.rb +++ b/spec/oai_solr_spec.rb @@ -62,6 +62,10 @@ def doc describe "Identify" do before(:each) { get oai_endpoint, verb: "Identify" } it_behaves_like "valid oai response" + + it "references metadata policy" do + expect(last_response.body).to include("/metadata-sharing-and-use-policy") + end end describe "ListMetadataFormats" do @@ -75,6 +79,10 @@ def doc it "claims to support marc21" do expect(doc.xpath("//xmlns:metadataPrefix").map { |mp| mp.content }).to include("marc21") end + + it "claims to support marc21_full" do + expect(doc.xpath("//xmlns:metadataPrefix").map { |mp| mp.content }).to include("marc21_full") + end end describe "ListSets" do @@ -286,6 +294,17 @@ def doc end end + describe "GetRecord full MARC" do + before(:each) { get oai_endpoint, verb: "GetRecord", metadataPrefix: "marc21_full", identifier: existing_record["id"] } + let(:response_record) { MARC::XMLReader.new(StringIO.new(last_response.body)).first } + + it_behaves_like "valid oai response" + + it "can get a record as MARC" do + expect(response_record.leader).to match(/[\dA-Za-z ]{23}/) + end + end + describe "GetRecord with nonexistent identifier" do it "returns oai idDoesNotExist error" do get oai_endpoint, verb: "GetRecord", metadataPrefix: "oai_dc", identifier: "nonexistent" From e6b3651fc23a60b492a12da905488feb5cee704d Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Thu, 23 May 2024 12:19:55 -0400 Subject: [PATCH 2/2] DEV-838: Disable full marc output * Leaves unit tests for MARC21 full output enabled * Removes it from provider configuration * Integration tests check that MARC21 full output is not enabled --- config/extra_description.xml | 2 +- lib/oai_solr/provider.rb | 7 +++++-- spec/oai_solr_marc21_full_spec.rb | 2 +- spec/oai_solr_spec.rb | 18 ++++++++++++++---- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/config/extra_description.xml b/config/extra_description.xml index 045917b..c29f354 100644 --- a/config/extra_description.xml +++ b/config/extra_description.xml @@ -10,7 +10,7 @@ https://www.hathitrust.org/the-collection/search-access/access-use-policy/ - HathiTrust is a collaborative library initiative. Users are encouraged to cite and link to digital content and are free to do so without asking for permission. Depending on the source of the digitized work, licenses or other contractual terms may restrict further distribution or other uses. For volume-specific information, please consult the <dc:rights> element (oai_dc), 856$r (marc21), or 974$r (marc21_full). You need to make your own assessment of the copyright or other legal concerns related to uses beyond those provided by HathiTrust for particular works. + HathiTrust is a collaborative library initiative. Users are encouraged to cite and link to digital content and are free to do so without asking for permission. Depending on the source of the digitized work, licenses or other contractual terms may restrict further distribution or other uses. For volume-specific information, please consult the <dc:rights> element (oai_dc) or 856$r (marc21). You need to make your own assessment of the copyright or other legal concerns related to uses beyond those provided by HathiTrust for particular works. The possible Access and Use statements that apply to each book are listed at the URL above. diff --git a/lib/oai_solr/provider.rb b/lib/oai_solr/provider.rb index 98ef0b5..12f32c6 100644 --- a/lib/oai_solr/provider.rb +++ b/lib/oai_solr/provider.rb @@ -1,7 +1,9 @@ require "oai" +require "oai_solr/settings" require "oai_solr/model" require "oai_solr/marc21" -require "oai_solr/marc21_full" +# Not currently enabled +# require "oai_solr/marc21_full" require "oai_solr/dublin_core" module OAISolr @@ -12,7 +14,8 @@ class Provider < OAI::Provider::Base admin_email Settings.admin_email source_model OAISolr::Model.new register_format OAISolr::Marc21.new - register_format OAISolr::Marc21Full.new + # Not currently enabled + # register_format OAISolr::Marc21Full.new register_format OAISolr::DublinCore.instance sample_id Settings.sample_identifier update_granularity OAI::Const::Granularity::LOW diff --git a/spec/oai_solr_marc21_full_spec.rb b/spec/oai_solr_marc21_full_spec.rb index ad546cb..89699ac 100644 --- a/spec/oai_solr_marc21_full_spec.rb +++ b/spec/oai_solr_marc21_full_spec.rb @@ -1,6 +1,6 @@ require "spec_helper" require "oai_solr/record" -require "oai_solr/marc21" +require "oai_solr/marc21_full" require "json" require "nokogiri" diff --git a/spec/oai_solr_spec.rb b/spec/oai_solr_spec.rb index 0362dca..a7d4788 100644 --- a/spec/oai_solr_spec.rb +++ b/spec/oai_solr_spec.rb @@ -80,9 +80,13 @@ def doc expect(doc.xpath("//xmlns:metadataPrefix").map { |mp| mp.content }).to include("marc21") end - it "claims to support marc21_full" do + xit "includes marc21_full (currently disabled)" do expect(doc.xpath("//xmlns:metadataPrefix").map { |mp| mp.content }).to include("marc21_full") end + + it "does not include marc21_full" do + expect(doc.xpath("//xmlns:metadataPrefix").map { |mp| mp.content }).not_to include("marc21_full") + end end describe "ListSets" do @@ -298,10 +302,16 @@ def doc before(:each) { get oai_endpoint, verb: "GetRecord", metadataPrefix: "marc21_full", identifier: existing_record["id"] } let(:response_record) { MARC::XMLReader.new(StringIO.new(last_response.body)).first } - it_behaves_like "valid oai response" + xcontext "when enabled (currently disabled)" do + it_behaves_like "valid oai response" - it "can get a record as MARC" do - expect(response_record.leader).to match(/[\dA-Za-z ]{23}/) + it "can get a record as MARC" do + expect(response_record.leader).to match(/[\dA-Za-z ]{23}/) + end + end + + it "returns an error" do + expect(doc.xpath("count(//xmlns:error[@code='cannotDisseminateFormat'])")).to eq(1) end end