Skip to content
This repository has been archived by the owner on Jun 27, 2020. It is now read-only.

Commit

Permalink
Merge branch 'release-3.3'
Browse files Browse the repository at this point in the history
  • Loading branch information
Jim Coble committed Jun 16, 2015
2 parents e153e19 + 8f81e48 commit 517d41f
Show file tree
Hide file tree
Showing 49 changed files with 1,156 additions and 35 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
# Environment configs
/config/environments/*.rb

# Ignore certain translation files
/config/locales/admin_set.yml

# SimpleCov / Coveralls
/coverage

Expand Down
7 changes: 5 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ source 'http://rubygems.org'
gem 'rails', '~> 4.1.6'
gem 'hydra-head', '~> 7.2.0'
gem 'ddr-alerts', '~> 1.0.0'
gem 'ddr-models', '~> 1.14.1'
gem 'ddr-models', '~> 1.16.1'
gem 'rubydora', '>= 1.8.1'
gem 'devise'
gem 'deprecation'
Expand All @@ -16,7 +16,7 @@ gem 'resque-pool', '0.3.0'
gem 'nest', '1.1.2'

# ExecJS runtime
gem 'therubyracer', '~> 0.11.3', :require => 'v8'
gem 'therubyracer', '~> 0.11.3', require: 'v8', group: :production

# For mapping file extensions to MIME types
gem 'mime-types', '~> 1.19'
Expand All @@ -25,6 +25,9 @@ gem 'mime-types', '~> 1.19'
gem 'rubyzip', '< 1.0.0'
gem 'paperclip', '~> 4.2.0'

# Filesystem representation
gem 'rubytree'

group :development, :test do
gem 'sqlite3'
gem 'rspec-rails', '~> 3.0.0'
Expand Down
15 changes: 10 additions & 5 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ GEM
rails (~> 4.1.6)
ddr-antivirus (1.3.2)
activesupport (~> 4.0)
ddr-models (1.14.1)
ddr-models (1.16.1)
active-fedora (~> 7.0)
ddr-antivirus (~> 1.3.1)
devise (~> 3.4)
Expand Down Expand Up @@ -131,7 +131,7 @@ GEM
rest-client (>= 1.6.1)
haml (4.0.6)
tilt
hashie (3.4.1)
hashie (3.4.2)
hike (1.2.3)
hooks (0.3.6)
uber (~> 0.0.4)
Expand Down Expand Up @@ -251,7 +251,7 @@ GEM
rake (>= 0.8.7)
thor (>= 0.18.1, < 2.0)
rake (10.4.2)
rdf (1.1.11)
rdf (1.1.12)
link_header (~> 0.0, >= 0.0.8)
rdf-aggregate-repo (1.1.0)
rdf (>= 1.1)
Expand Down Expand Up @@ -290,7 +290,7 @@ GEM
rdf-turtle (1.1.5)
ebnf (~> 0.3, >= 0.3.6)
rdf (~> 1.1, >= 1.1.4)
rdf-vocab (0.8.1)
rdf-vocab (0.8.3)
rdf (~> 1.1, >= 1.1.10)
rdf-xsd (1.1.2)
rdf (~> 1.1)
Expand Down Expand Up @@ -341,6 +341,9 @@ GEM
mime-types
nokogiri
rest-client
rubytree (0.9.4)
json (~> 1.8)
structured_warnings (~> 0.1)
rubyzip (0.9.9)
sass (3.2.19)
sass-rails (4.0.5)
Expand Down Expand Up @@ -385,6 +388,7 @@ GEM
sprockets (>= 2.8, < 4.0)
sqlite3 (1.3.10)
stomp (1.3.4)
structured_warnings (0.2.0)
sxp (0.1.5)
term-ansicolor (1.3.0)
tins (~> 1.0)
Expand Down Expand Up @@ -418,7 +422,7 @@ DEPENDENCIES
coveralls
database_cleaner
ddr-alerts (~> 1.0.0)
ddr-models (~> 1.14.1)
ddr-models (~> 1.16.1)
deprecation
devise
equivalent-xml
Expand All @@ -438,6 +442,7 @@ DEPENDENCIES
resque-pool (= 0.3.0)
rspec-rails (~> 3.0.0)
rubydora (>= 1.8.1)
rubytree
rubyzip (< 1.0.0)
sass-rails (~> 4.0.1)
sqlite3
Expand Down
31 changes: 31 additions & 0 deletions app/models/filesystem.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
class Filesystem

attr_accessor :tree

def initialize(filepath='/')
@tree = Tree::TreeNode.new(filepath)
end

def method_missing(sym, *args, &block)
@tree.send(sym, *args, &block)
end

def self.path_to_node(node, type='full')
if node.is_root?
type == 'full' ? node.name : nil
else
start_idx = type == 'full' ? 0 : 1
path_nodes = node.parentage.reverse.map(&:name)[start_idx..-1]
path_nodes.empty? ? node.name : File.join(path_nodes, node.name)
end
end

def self.node_locator(node)
path_to_node(node, 'relative')
end

def simple_ingest_filesystem?
!tree.each_leaf.any? { |leaf| leaf.node_depth != 2 }
end

end
32 changes: 32 additions & 0 deletions app/models/simple_ingest_checksum.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
class SimpleIngestChecksum

attr_reader :checksum_filepath

DATA_PREFIX = 'data'

def initialize(checksum_filepath)
@checksum_filepath = checksum_filepath
@checksum_hash = {}
end

def checksum(relative_filepath)
checksums[File.join(DATA_PREFIX, relative_filepath)]
end

private

def checksums
if @checksum_hash.empty?
begin
File.open(checksum_filepath, 'r') do |file|
file.each_line do |line|
sum, path = line.split
@checksum_hash[path] = sum
end
end
end
end
@checksum_hash
end

end
95 changes: 95 additions & 0 deletions app/models/simple_ingest_metadata.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
class SimpleIngestMetadata

attr_reader :metadata_filepath, :metadata_profile

DATA_PREFIX = 'data'

# Used in accommodation of case and spacing errors in column headings
NORMALIZED_TERMS = Ddr::Vocab::Vocabulary.term_names(RDF::DC).map(&:downcase).map(&:to_s)

def initialize(metadata_filepath, metadata_profile)
@metadata_filepath = metadata_filepath
@metadata_profile = metadata_profile
validate_headers
end

def metadata(locator)
metadata = {}
loc = locator.present? ? File.join(DATA_PREFIX, locator) : DATA_PREFIX
if metadata_grid[loc]
metadata_grid[loc].each do |heading, field_contents|
unless field_contents.blank?
metadata = add_field_to_metadata(metadata, heading, field_contents)
end
end
end
metadata
end

private

def add_field_to_metadata(metadata, heading, field_contents)
metadata[heading] ||= []
metadata[heading] += Array(parse_field_contents(heading, field_contents))
metadata
end

def parse_field_contents(heading, field_contents)
if field_contents && repeatable_fields.include?(heading)
field_contents.split(repeating_fields_separator).map(&:strip)
else
field_contents
end
end

def validate_headers
invalid_headers = []
as_csv_table.headers[1..-1].each do |header|
invalid_headers << header unless valid_header?(header)
end
unless invalid_headers.empty?
raise ArgumentError, "Invalid metadata terms in header row: #{invalid_headers.join(', ')}"
end
end

def valid_header?(header)
NORMALIZED_TERMS.include?(normalize_header(header))
end

def normalize_header(header)
header.downcase.gsub(/\s+/, "")
end

def metadata_grid
unless @metadata_grid
@metadata_grid = {}
as_csv_table.each do |row|
locator = row.field(0)
locator.sub!(/\/$/,"") # remove trailing slash if present
row.delete(0)
@metadata_grid[locator] = row
end
end
@metadata_grid
end

def as_csv_table
@csv_table ||= CSV.read(metadata_filepath, metadata_profile[:csv])
end

def repeating_fields_separator
metadata_profile[:parse][:repeating_fields_separator]
end

def repeatable_fields
metadata_profile[:parse][:repeatable_fields]
end

# Accommodate case and spacing errors in column headings
CSV::HeaderConverters[:canonicalize] = lambda{ |h|
NORMALIZED_TERMS.index(h.downcase.gsub(/\s+/, "")) ?
Ddr::Vocab::Vocabulary.term_names(RDF::DC)[NORMALIZED_TERMS.index(h.downcase.gsub(/\s+/, ""))].to_s :
h
}

end
102 changes: 102 additions & 0 deletions app/services/build_batch_from_folder_ingest.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
class BuildBatchFromFolderIngest

attr_reader :user, :filesystem, :content_modeler, :metadata_provider, :checksum_provider, :batch_name, :batch_description
attr_accessor :batch, :collection_pid

def initialize(user, filesystem, content_modeler, metadata_provider, checksum_provider, batch_name=nil, batch_description=nil )
@user = user
@filesystem = filesystem
@content_modeler = content_modeler
@metadata_provider = metadata_provider
@checksum_provider = checksum_provider
@batch_name = batch_name
@batch_description = batch_description
end

def call
@batch = create_batch
traverse_filesystem
batch.update_attributes(status: DulHydra::Batch::Models::Batch::STATUS_READY)
batch
end

private

def create_batch
DulHydra::Batch::Models::Batch.create(user: user, name: batch_name, description: batch_description)
end

def traverse_filesystem
filesystem.each do |node|
obj = create_object(node)
end
end

def create_object(node)
object_model = content_modeler.new(node).call
pid = assign_pid(node) if ['Collection', 'Item'].include?(object_model)
self.collection_pid = pid if object_model == 'Collection'
batch_object = DulHydra::Batch::Models::IngestBatchObject.create(batch: batch, model: object_model, pid: pid)
add_relationships(batch_object, node.parent)
add_metadata(batch_object, node)
add_content_datastream(batch_object, node) if object_model == 'Component'
end

def assign_pid(node)
node.content ||= {}
node.content[:pid] = ActiveFedora::Base.connection_for_pid('0').mint
end

def add_relationships(batch_object, parent_node)
batch_object.batch_object_relationships <<
create_relationship(DulHydra::Batch::Models::BatchObjectRelationship::RELATIONSHIP_ADMIN_POLICY, collection_pid)
case batch_object.model
when 'Item'
batch_object.batch_object_relationships <<
create_relationship(DulHydra::Batch::Models::BatchObjectRelationship::RELATIONSHIP_PARENT, parent_node.content[:pid])
when 'Component'
batch_object.batch_object_relationships <<
create_relationship(DulHydra::Batch::Models::BatchObjectRelationship::RELATIONSHIP_PARENT, parent_node.content[:pid])
end
end

def add_metadata(batch_object, node)
locator = Filesystem.node_locator(node)
metadata_provider.metadata(locator).each do |key, value|
Array(value).each do |v|
DulHydra::Batch::Models::BatchObjectAttribute.create(
batch_object: batch_object,
datastream: Ddr::Datastreams::DESC_METADATA,
name: key,
operation: DulHydra::Batch::Models::BatchObjectAttribute::OPERATION_ADD,
value: v,
value_type: DulHydra::Batch::Models::BatchObjectAttribute::VALUE_TYPE_STRING
)
end
end
end

def add_content_datastream(batch_object, node)
full_filepath = Filesystem.path_to_node(node)
rel_filepath = Filesystem.path_to_node(node, 'relative')
ds = DulHydra::Batch::Models::BatchObjectDatastream.create(
name: Ddr::Datastreams::CONTENT,
operation: DulHydra::Batch::Models::BatchObjectDatastream::OPERATION_ADD,
payload: full_filepath,
payload_type: DulHydra::Batch::Models::BatchObjectDatastream::PAYLOAD_TYPE_FILENAME,
checksum: checksum_provider.checksum(rel_filepath),
checksum_type: Ddr::Datastreams::CHECKSUM_TYPE_SHA256
)
batch_object.batch_object_datastreams << ds
end

def create_relationship(relationship_name, relationship_target_pid)
DulHydra::Batch::Models::BatchObjectRelationship.create(
name: relationship_name,
operation: DulHydra::Batch::Models::BatchObjectRelationship::OPERATION_ADD,
object: relationship_target_pid,
object_type: DulHydra::Batch::Models::BatchObjectRelationship::OBJECT_TYPE_PID
)
end

end
Loading

0 comments on commit 517d41f

Please sign in to comment.