This repository has been archived by the owner on Jun 27, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
49 changed files
with
1,156 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
class Filesystem | ||
|
||
attr_accessor :tree | ||
|
||
def initialize(filepath='/') | ||
@tree = Tree::TreeNode.new(filepath) | ||
end | ||
|
||
def method_missing(sym, *args, &block) | ||
@tree.send(sym, *args, &block) | ||
end | ||
|
||
def self.path_to_node(node, type='full') | ||
if node.is_root? | ||
type == 'full' ? node.name : nil | ||
else | ||
start_idx = type == 'full' ? 0 : 1 | ||
path_nodes = node.parentage.reverse.map(&:name)[start_idx..-1] | ||
path_nodes.empty? ? node.name : File.join(path_nodes, node.name) | ||
end | ||
end | ||
|
||
def self.node_locator(node) | ||
path_to_node(node, 'relative') | ||
end | ||
|
||
def simple_ingest_filesystem? | ||
!tree.each_leaf.any? { |leaf| leaf.node_depth != 2 } | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
class SimpleIngestChecksum | ||
|
||
attr_reader :checksum_filepath | ||
|
||
DATA_PREFIX = 'data' | ||
|
||
def initialize(checksum_filepath) | ||
@checksum_filepath = checksum_filepath | ||
@checksum_hash = {} | ||
end | ||
|
||
def checksum(relative_filepath) | ||
checksums[File.join(DATA_PREFIX, relative_filepath)] | ||
end | ||
|
||
private | ||
|
||
def checksums | ||
if @checksum_hash.empty? | ||
begin | ||
File.open(checksum_filepath, 'r') do |file| | ||
file.each_line do |line| | ||
sum, path = line.split | ||
@checksum_hash[path] = sum | ||
end | ||
end | ||
end | ||
end | ||
@checksum_hash | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
class SimpleIngestMetadata | ||
|
||
attr_reader :metadata_filepath, :metadata_profile | ||
|
||
DATA_PREFIX = 'data' | ||
|
||
# Used in accommodation of case and spacing errors in column headings | ||
NORMALIZED_TERMS = Ddr::Vocab::Vocabulary.term_names(RDF::DC).map(&:downcase).map(&:to_s) | ||
|
||
def initialize(metadata_filepath, metadata_profile) | ||
@metadata_filepath = metadata_filepath | ||
@metadata_profile = metadata_profile | ||
validate_headers | ||
end | ||
|
||
def metadata(locator) | ||
metadata = {} | ||
loc = locator.present? ? File.join(DATA_PREFIX, locator) : DATA_PREFIX | ||
if metadata_grid[loc] | ||
metadata_grid[loc].each do |heading, field_contents| | ||
unless field_contents.blank? | ||
metadata = add_field_to_metadata(metadata, heading, field_contents) | ||
end | ||
end | ||
end | ||
metadata | ||
end | ||
|
||
private | ||
|
||
def add_field_to_metadata(metadata, heading, field_contents) | ||
metadata[heading] ||= [] | ||
metadata[heading] += Array(parse_field_contents(heading, field_contents)) | ||
metadata | ||
end | ||
|
||
def parse_field_contents(heading, field_contents) | ||
if field_contents && repeatable_fields.include?(heading) | ||
field_contents.split(repeating_fields_separator).map(&:strip) | ||
else | ||
field_contents | ||
end | ||
end | ||
|
||
def validate_headers | ||
invalid_headers = [] | ||
as_csv_table.headers[1..-1].each do |header| | ||
invalid_headers << header unless valid_header?(header) | ||
end | ||
unless invalid_headers.empty? | ||
raise ArgumentError, "Invalid metadata terms in header row: #{invalid_headers.join(', ')}" | ||
end | ||
end | ||
|
||
def valid_header?(header) | ||
NORMALIZED_TERMS.include?(normalize_header(header)) | ||
end | ||
|
||
def normalize_header(header) | ||
header.downcase.gsub(/\s+/, "") | ||
end | ||
|
||
def metadata_grid | ||
unless @metadata_grid | ||
@metadata_grid = {} | ||
as_csv_table.each do |row| | ||
locator = row.field(0) | ||
locator.sub!(/\/$/,"") # remove trailing slash if present | ||
row.delete(0) | ||
@metadata_grid[locator] = row | ||
end | ||
end | ||
@metadata_grid | ||
end | ||
|
||
def as_csv_table | ||
@csv_table ||= CSV.read(metadata_filepath, metadata_profile[:csv]) | ||
end | ||
|
||
def repeating_fields_separator | ||
metadata_profile[:parse][:repeating_fields_separator] | ||
end | ||
|
||
def repeatable_fields | ||
metadata_profile[:parse][:repeatable_fields] | ||
end | ||
|
||
# Accommodate case and spacing errors in column headings | ||
CSV::HeaderConverters[:canonicalize] = lambda{ |h| | ||
NORMALIZED_TERMS.index(h.downcase.gsub(/\s+/, "")) ? | ||
Ddr::Vocab::Vocabulary.term_names(RDF::DC)[NORMALIZED_TERMS.index(h.downcase.gsub(/\s+/, ""))].to_s : | ||
h | ||
} | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
class BuildBatchFromFolderIngest | ||
|
||
attr_reader :user, :filesystem, :content_modeler, :metadata_provider, :checksum_provider, :batch_name, :batch_description | ||
attr_accessor :batch, :collection_pid | ||
|
||
def initialize(user, filesystem, content_modeler, metadata_provider, checksum_provider, batch_name=nil, batch_description=nil ) | ||
@user = user | ||
@filesystem = filesystem | ||
@content_modeler = content_modeler | ||
@metadata_provider = metadata_provider | ||
@checksum_provider = checksum_provider | ||
@batch_name = batch_name | ||
@batch_description = batch_description | ||
end | ||
|
||
def call | ||
@batch = create_batch | ||
traverse_filesystem | ||
batch.update_attributes(status: DulHydra::Batch::Models::Batch::STATUS_READY) | ||
batch | ||
end | ||
|
||
private | ||
|
||
def create_batch | ||
DulHydra::Batch::Models::Batch.create(user: user, name: batch_name, description: batch_description) | ||
end | ||
|
||
def traverse_filesystem | ||
filesystem.each do |node| | ||
obj = create_object(node) | ||
end | ||
end | ||
|
||
def create_object(node) | ||
object_model = content_modeler.new(node).call | ||
pid = assign_pid(node) if ['Collection', 'Item'].include?(object_model) | ||
self.collection_pid = pid if object_model == 'Collection' | ||
batch_object = DulHydra::Batch::Models::IngestBatchObject.create(batch: batch, model: object_model, pid: pid) | ||
add_relationships(batch_object, node.parent) | ||
add_metadata(batch_object, node) | ||
add_content_datastream(batch_object, node) if object_model == 'Component' | ||
end | ||
|
||
def assign_pid(node) | ||
node.content ||= {} | ||
node.content[:pid] = ActiveFedora::Base.connection_for_pid('0').mint | ||
end | ||
|
||
def add_relationships(batch_object, parent_node) | ||
batch_object.batch_object_relationships << | ||
create_relationship(DulHydra::Batch::Models::BatchObjectRelationship::RELATIONSHIP_ADMIN_POLICY, collection_pid) | ||
case batch_object.model | ||
when 'Item' | ||
batch_object.batch_object_relationships << | ||
create_relationship(DulHydra::Batch::Models::BatchObjectRelationship::RELATIONSHIP_PARENT, parent_node.content[:pid]) | ||
when 'Component' | ||
batch_object.batch_object_relationships << | ||
create_relationship(DulHydra::Batch::Models::BatchObjectRelationship::RELATIONSHIP_PARENT, parent_node.content[:pid]) | ||
end | ||
end | ||
|
||
def add_metadata(batch_object, node) | ||
locator = Filesystem.node_locator(node) | ||
metadata_provider.metadata(locator).each do |key, value| | ||
Array(value).each do |v| | ||
DulHydra::Batch::Models::BatchObjectAttribute.create( | ||
batch_object: batch_object, | ||
datastream: Ddr::Datastreams::DESC_METADATA, | ||
name: key, | ||
operation: DulHydra::Batch::Models::BatchObjectAttribute::OPERATION_ADD, | ||
value: v, | ||
value_type: DulHydra::Batch::Models::BatchObjectAttribute::VALUE_TYPE_STRING | ||
) | ||
end | ||
end | ||
end | ||
|
||
def add_content_datastream(batch_object, node) | ||
full_filepath = Filesystem.path_to_node(node) | ||
rel_filepath = Filesystem.path_to_node(node, 'relative') | ||
ds = DulHydra::Batch::Models::BatchObjectDatastream.create( | ||
name: Ddr::Datastreams::CONTENT, | ||
operation: DulHydra::Batch::Models::BatchObjectDatastream::OPERATION_ADD, | ||
payload: full_filepath, | ||
payload_type: DulHydra::Batch::Models::BatchObjectDatastream::PAYLOAD_TYPE_FILENAME, | ||
checksum: checksum_provider.checksum(rel_filepath), | ||
checksum_type: Ddr::Datastreams::CHECKSUM_TYPE_SHA256 | ||
) | ||
batch_object.batch_object_datastreams << ds | ||
end | ||
|
||
def create_relationship(relationship_name, relationship_target_pid) | ||
DulHydra::Batch::Models::BatchObjectRelationship.create( | ||
name: relationship_name, | ||
operation: DulHydra::Batch::Models::BatchObjectRelationship::OPERATION_ADD, | ||
object: relationship_target_pid, | ||
object_type: DulHydra::Batch::Models::BatchObjectRelationship::OBJECT_TYPE_PID | ||
) | ||
end | ||
|
||
end |
Oops, something went wrong.