diff --git a/config/initializers/bulkrax.rb b/config/initializers/bulkrax.rb index 412a3cd..e53a3fe 100644 --- a/config/initializers/bulkrax.rb +++ b/config/initializers/bulkrax.rb @@ -2,125 +2,124 @@ # Ensure Knapsack version gets loaded after Hyku's bulkrax.rb Rails.application.config.after_initialize do - if ENV.fetch('HYKU_BULKRAX_ENABLED', 'true') == 'true' - Bulkrax.setup do |config| - ## - # By default this is the first registered curation concern. But based on tests and past - # configs, this should be 'GenericWork'. Note: The below value could change, but it should be - # explicit. - # - # See https://github.com/samvera/hyku/blob/07fde572f9152d513b13f71cae90dd4fdfbfba6c/config/initializers/hyrax.rb#L200-L202 - config.default_work_type = 'GenericWork' - - # Setting the available parsers for Adventist. - config.parsers = [ - { name: "OAI - Adventist Digital Library", class_name: "Bulkrax::OaiAdventistQdcParser", partial: "oai_adventist_fields" }, - { name: "CSV - Comma Separated Values", class_name: "Bulkrax::CsvParser", partial: "csv_fields" }, - ] - - # Should Bulkrax make up source identifiers for you? This allow round tripping - # and download errored entries to still work, but does mean if you upload the - # same source record in two different files you WILL get duplicates. - # It is given two aruguments, self at the time of call and the index of the reocrd - # config.fill_in_blank_source_identifiers = ->(parser, index) { "b-#{parser.importer.id}-#{index}"} - # or use a uuid - # config.fill_in_blank_source_identifiers = ->(parser, index) { SecureRandom.uuid } - - # Field mappings - # Create a completely new set of mappings by replacing the whole set as follows - # config.field_mappings = { - # "Bulkrax::OaiDcParser" => { **individual field mappings go here*** } - # } - - # Add to, or change existing mappings as follows - # e.g. to exclude date - # config.field_mappings["Bulkrax::OaiDcParser"]["date"] = { from: ["date"], excluded: true } - # - # # e.g. to add the required source_identifier field - # # config.field_mappings["Bulkrax::CsvParser"]["source_id"] = { from: ["old_source_id"], source_identifier: true } - # If you want Bulkrax to fill in source_identifiers for you, see below - - # To duplicate a set of mappings from one parser to another - # config.field_mappings["Bulkrax::OaiOmekaParser"] = {} - # config.field_mappings["Bulkrax::OaiDcParser"].each {|key,value| config.field_mappings["Bulkrax::OaiOmekaParser"][key] = value } - config.field_mappings['Bulkrax::OaiAdventistQdcParser'] = { - 'abstract' => { from: ['abstract'] }, - 'aark_id' => { from: ['aark_id'] }, - 'identifier' => { from: ['identifier'], source_identifier: true }, - 'bibliographic_citation' => { from: ['bibliographic_citation'] }, - 'creator' => { from: ['creator'] }, - 'contributor' => { from: ['contributor'] }, - 'edition' => { from: ['edition'] }, - 'resource_type' => { from: ['resource_type'] }, - 'issue_number' => { from: ['issue_number'] }, - 'language' => { from: ['language'] }, - 'description' => { from: ['description'] }, - 'pagination' => { from: ['pagination'] }, - 'extent' => { from: ['extent'], split: ';' }, - 'source' => { from: ['source'] }, - 'date_issued' => { from: ['date_issued'] }, - 'alt' => { from: ['geocode'] }, - 'publisher' => { from: ['publisher'], split: ';' }, - 'rights_statement' => { from: ['rights_statement'] }, - 'part_of' => { from: ['part_of'] }, - 'part' => { from: ['part_of'] }, - 'date_created' => { from: ['date_created'] }, - 'title' => { from: ['title'] }, - 'subject' => { from: ['subject'], split: ';' }, - 'volume_number' => { from: ['volume_number'] }, - 'keyword' => { from: ['keyword'], split: ';' }, - 'location' => { from: ['location'], split: ';' }, - 'model' => { from: ['model', 'work_type'] }, - 'remote_files' => { from: ['related_url'], split: ';', parsed: true }, - 'thumbnail_url' => { from: ['thumbnail_url'], default_thumbnail: true, parsed: true }, - 'video_embed' => { from: ['video_embed'] }, - 'refereed' => { from: ['peer_reviewed'] } - } - config.field_mappings['Bulkrax::CsvParser'] = { - 'abstract' => { from: ['description.abstract'] }, - 'aark_id' => { from: ['identifier.ark'] }, - 'identifier' => { from: ['identifier'], source_identifier: true }, - 'bibliographic_citation' => { from: ['identifier.bibliographicCitation'] }, - 'creator' => { from: ['creator'], split: ';' }, - 'contributor' => { from: ['contributor'], split: ';' }, - 'edition' => { from: ['title.release'] }, - 'resource_type' => { from: ['type'] }, - 'issue_number' => { from: ['relation.isPartOfIssue'] }, - 'language' => { from: ['language'], split: ';' }, - 'description' => { from: ['description'], split: ';' }, - 'pagination' => { from: ['format.extent'] }, - 'extent' => { from: ['format.extent'], split: ';' }, - 'source' => { from: ['source'], split: ';' }, - 'date_issued' => { from: ['date'] }, - 'alt' => { from: ['coverage.spatial'] }, - 'publisher' => { from: ['publisher'], split: ';' }, - 'rights_statement' => { from: ['rights'] }, - 'part_of' => { from: ['relation.isPartOf'], split: ';' }, - 'part' => { from: ['relation.isPartOf'] }, - 'date_created' => { from: ['date.other'] }, - 'title' => { from: ['title'] }, - 'subject' => { from: ['subject'], split: ';' }, - 'volume_number' => { from: ['relation.isPartOfVolume'] }, - 'keyword' => { from: ['keyword'], split: ';' }, - 'location' => { from: ['location'], split: ';' }, - 'model' => { from: ['work_type'] }, - 'remote_files' => { from: ['related_url'], split: ';', parsed: true }, - 'remote_url' => { from: ['official_url', 'remote_url'], split: ';' }, - 'thumbnail_url' => { from: ['thumbnail_url'], default_thumbnail: true, parsed: true }, - 'video_embed' => { from: ['video_embed'] }, - 'refereed' => { from: ['peer_reviewed'] } - } + Bulkrax.setup do |config| + ## + # By default this is the first registered curation concern. But based on tests and past + # configs, this should be 'GenericWork'. Note: The below value could change, but it should be + # explicit. + # + # See https://github.com/samvera/hyku/blob/07fde572f9152d513b13f71cae90dd4fdfbfba6c/config/initializers/hyrax.rb#L200-L202 + config.default_work_type = 'GenericWork' + + # Setting the available parsers for Adventist. + config.parsers = [ + { name: "OAI - Adventist Digital Library", class_name: "Bulkrax::OaiAdventistQdcParser", partial: "oai_adventist_fields" }, + { name: "CSV - Comma Separated Values", class_name: "Bulkrax::CsvParser", partial: "csv_fields" }, + ] + + # Should Bulkrax make up source identifiers for you? This allow round tripping + # and download errored entries to still work, but does mean if you upload the + # same source record in two different files you WILL get duplicates. + # It is given two aruguments, self at the time of call and the index of the reocrd + # config.fill_in_blank_source_identifiers = ->(parser, index) { "b-#{parser.importer.id}-#{index}"} + # or use a uuid + # config.fill_in_blank_source_identifiers = ->(parser, index) { SecureRandom.uuid } - config.field_mappings['Bulkrax::CsvParser'].merge!( - 'parents' => { from: ['parents'], split: /\s*[;|]\s*/, related_parents_field_mapping: true }, - 'children' => { from: ['children'], split: /\s*[;|]\s*/, related_children_field_mapping: true } - ) + # Field mappings + # Create a completely new set of mappings by replacing the whole set as follows + # config.field_mappings = { + # "Bulkrax::OaiDcParser" => { **individual field mappings go here*** } + # } - # Lambda to set the default field mapping - config.default_field_mapping = lambda do |field| - return if field.blank? - { - field.to_s => + # Add to, or change existing mappings as follows + # e.g. to exclude date + # config.field_mappings["Bulkrax::OaiDcParser"]["date"] = { from: ["date"], excluded: true } + # + # # e.g. to add the required source_identifier field + # # config.field_mappings["Bulkrax::CsvParser"]["source_id"] = { from: ["old_source_id"], source_identifier: true } + # If you want Bulkrax to fill in source_identifiers for you, see below + + # To duplicate a set of mappings from one parser to another + # config.field_mappings["Bulkrax::OaiOmekaParser"] = {} + # config.field_mappings["Bulkrax::OaiDcParser"].each {|key,value| config.field_mappings["Bulkrax::OaiOmekaParser"][key] = value } + config.field_mappings['Bulkrax::OaiAdventistQdcParser'] = { + 'abstract' => { from: ['abstract'] }, + 'aark_id' => { from: ['aark_id'] }, + 'identifier' => { from: ['identifier'], source_identifier: true }, + 'bibliographic_citation' => { from: ['bibliographic_citation'] }, + 'creator' => { from: ['creator'] }, + 'contributor' => { from: ['contributor'] }, + 'edition' => { from: ['edition'] }, + 'resource_type' => { from: ['resource_type'] }, + 'issue_number' => { from: ['issue_number'] }, + 'language' => { from: ['language'] }, + 'description' => { from: ['description'] }, + 'pagination' => { from: ['pagination'] }, + 'extent' => { from: ['extent'], split: ';' }, + 'source' => { from: ['source'] }, + 'date_issued' => { from: ['date_issued'] }, + 'alt' => { from: ['geocode'] }, + 'publisher' => { from: ['publisher'], split: ';' }, + 'rights_statement' => { from: ['rights_statement'] }, + 'part_of' => { from: ['part_of'] }, + 'part' => { from: ['part_of'] }, + 'date_created' => { from: ['date_created'] }, + 'title' => { from: ['title'] }, + 'subject' => { from: ['subject'], split: ';' }, + 'volume_number' => { from: ['volume_number'] }, + 'keyword' => { from: ['keyword'], split: ';' }, + 'location' => { from: ['location'], split: ';' }, + 'model' => { from: ['model', 'work_type'] }, + 'remote_files' => { from: ['related_url'], split: ';', parsed: true }, + 'thumbnail_url' => { from: ['thumbnail_url'], default_thumbnail: true, parsed: true }, + 'video_embed' => { from: ['video_embed'] }, + 'refereed' => { from: ['peer_reviewed'] } + } + config.field_mappings['Bulkrax::CsvParser'] = { + 'abstract' => { from: ['description.abstract'] }, + 'aark_id' => { from: ['identifier.ark'] }, + 'identifier' => { from: ['identifier'], source_identifier: true }, + 'bibliographic_citation' => { from: ['identifier.bibliographicCitation'] }, + 'creator' => { from: ['creator'], split: ';' }, + 'contributor' => { from: ['contributor'], split: ';' }, + 'edition' => { from: ['title.release'] }, + 'resource_type' => { from: ['type'] }, + 'issue_number' => { from: ['relation.isPartOfIssue'] }, + 'language' => { from: ['language'], split: ';' }, + 'description' => { from: ['description'], split: ';' }, + 'pagination' => { from: ['format.extent'] }, + 'extent' => { from: ['format.extent'], split: ';' }, + 'source' => { from: ['source'], split: ';' }, + 'date_issued' => { from: ['date'] }, + 'alt' => { from: ['coverage.spatial'] }, + 'publisher' => { from: ['publisher'], split: ';' }, + 'rights_statement' => { from: ['rights'] }, + 'part_of' => { from: ['relation.isPartOf'], split: ';' }, + 'part' => { from: ['relation.isPartOf'] }, + 'date_created' => { from: ['date.other'] }, + 'title' => { from: ['title'] }, + 'subject' => { from: ['subject'], split: ';' }, + 'volume_number' => { from: ['relation.isPartOfVolume'] }, + 'keyword' => { from: ['keyword'], split: ';' }, + 'location' => { from: ['location'], split: ';' }, + 'model' => { from: ['work_type'] }, + 'remote_files' => { from: ['related_url'], split: ';', parsed: true }, + 'remote_url' => { from: ['official_url', 'remote_url'], split: ';' }, + 'thumbnail_url' => { from: ['thumbnail_url'], default_thumbnail: true, parsed: true }, + 'video_embed' => { from: ['video_embed'] }, + 'refereed' => { from: ['peer_reviewed'] } + } + + config.field_mappings['Bulkrax::CsvParser'].merge!( + 'parents' => { from: ['parents'], split: /\s*[;|]\s*/, related_parents_field_mapping: true }, + 'children' => { from: ['children'], split: /\s*[;|]\s*/, related_children_field_mapping: true } + ) + + # Lambda to set the default field mapping + config.default_field_mapping = lambda do |field| + return if field.blank? + { + field.to_s => { from: [field.to_s], split: false, @@ -129,43 +128,43 @@ excluded: false, default_thumbnail: false } - } - end - - # WorkType to use as the default if none is specified in the import - # Default is the first returned by Hyrax.config.curation_concerns - # config.default_work_type = MyWork - - # Path to store pending imports - # config.import_path = 'tmp/imports' - - # Path to store exports before download - # config.export_path = 'tmp/exports' - - # Server name for oai request header - # config.server_name = 'my_server@name.com' - - # Field_mapping for establishing a parent-child relationship (FROM parent TO child) - # This can be a Collection to Work, or Work to Work relationship - # This value IS NOT used for OAI, so setting the OAI Entries here will have no effect - # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry' - # Example: - # { - # 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents', - # 'Bulkrax::CsvEntry' => 'children' - # } - # By default no parent-child relationships are added - # config.parent_child_field_mapping = { } - - # Field_mapping for establishing a collection relationship (FROM work TO collection) - # This value IS NOT used for OAI, so setting the OAI parser here will have no effect - # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry' - # The default value for CSV is collection - # Add/replace parsers, for example: - # config.collection_field_mapping['Bulkrax::RdfEntry'] = 'http://opaquenamespace.org/ns/set' - - # Properties that should not be used in imports/exports. They are reserved for use by Hyrax. - # config.reserved_properties += ['my_field'] + } end + + # WorkType to use as the default if none is specified in the import + # Default is the first returned by Hyrax.config.curation_concerns + # config.default_work_type = MyWork + + # Path to store pending imports + # config.import_path = 'tmp/imports' + + # Path to store exports before download + # config.export_path = 'tmp/exports' + + # Server name for oai request header + # config.server_name = 'my_server@name.com' + + # Field_mapping for establishing a parent-child relationship (FROM parent TO child) + # This can be a Collection to Work, or Work to Work relationship + # This value IS NOT used for OAI, so setting the OAI Entries here will have no effect + # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry' + # Example: + # { + # 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents', + # 'Bulkrax::CsvEntry' => 'children' + # } + # By default no parent-child relationships are added + # config.parent_child_field_mapping = { } + + # Field_mapping for establishing a collection relationship (FROM work TO collection) + # This value IS NOT used for OAI, so setting the OAI parser here will have no effect + # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry' + # The default value for CSV is collection + # Add/replace parsers, for example: + # config.collection_field_mapping['Bulkrax::RdfEntry'] = 'http://opaquenamespace.org/ns/set' + + # Properties that should not be used in imports/exports. They are reserved for use by Hyrax. + # config.reserved_properties += ['my_field'] + end end diff --git a/docker-compose.yml b/docker-compose.yml index c2a3c6e..7ebc94c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,18 +19,22 @@ x-app: &app - GOOD_JOB_CLEANUP_DISCARDED_JOBS=false - GOOD_JOB_CLEANUP_INTERVAL_SECONDS=86400 - GOOD_JOB_CLEANUP_PRESERVED_JOBS_BEFORE_SECONDS_AGO=604800 - - HYRAX_ACTIVE_JOB_QUEUE=good_job - HYKU_ADMIN_HOST=adl.test - - VIRTUAL_HOST=.adl.test - HYKU_DEFAULT_HOST=%{tenant}.adl.test - HYKU_ROOT_HOST=adl.test - - VIRTUAL_PORT=3000 + - HYRAX_ACTIVE_JOB_QUEUE=good_job volumes: - node_modules:/app/samvera/hyrax-webapp/node_modules:cached - uploads:/app/samvera/hyrax-webapp/public/uploads:cached - assets:/app/samvera/hyrax-webapp/public/assets:cached - cache:/app/samvera/hyrax-webapp/tmp/cache:cached - .:/app/samvera + env_file: + - .env + networks: + internal: + + x-app-worker: &app-worker <<: *app @@ -49,12 +53,21 @@ x-app-worker: &app-worker image: ghcr.io/scientist-softserv/adventist_knapsack/worker:${TAG:-latest} # Uncomment command to access container with out starting bin/worker. Useful for debugging or updating Gemfile.lock # command: sleep infinity - volumes: - - node_modules:/app/samvera/hyrax-webapp/node_modules:cached - - uploads:/app/samvera/hyrax-webapp/public/uploads:cached - - assets:/app/samvera/hyrax-webapp/public/assets:cached - - cache:/app/samvera/hyrax-webapp/tmp/cache:cached - - .:/app/samvera + depends_on: + check_volumes: + condition: service_completed_successfully + initialize_app: + condition: service_completed_successfully + db: + condition: service_started + solr: + condition: service_started + fcrepo: + condition: service_started + redis: + condition: service_started + zoo: + condition: service_started volumes: assets: @@ -103,16 +116,49 @@ services: extends: file: hyrax-webapp/docker-compose.yml service: base - image: ghcr.io/samvera/hyku/base:${TAG:-latest} + image: ghcr.io/samvera/hyku/base:${BASE_TAG:-latest} command: bash -l -c "echo 'base is only used for building base images, which in turn reduces image build times. It does not need to be run'" web: <<: *app - extends: - file: hyrax-webapp/docker-compose.yml - service: web # Uncomment command to access container with out starting bin/web. Useful for debugging or updating Gemfile.lock # command: sleep infinity + environment: + - AUXILIARY_QUEUE_TENANTS="sdapi" # This line is what makes the knapsack include use the local code instead of the remote gem + - AWS_REGION=us-east-1 + - AWS_S3_BUCKET=space-stone-production-preprocessedbucketf21466dd-15sun4xy658nh + - BUNDLE_DISABLE_LOCAL_BRANCH_CHECK=true + - BUNDLE_LOCAL__HYKU_KNAPSACK=/app/samvera # This line is what makes the knapsack include use the local code instead of the remote gem + - FITS_SERVLET_URL=http://fits:8080/fits + - GOOD_JOB_CLEANUP_DISCARDED_JOBS=false + - GOOD_JOB_CLEANUP_INTERVAL_SECONDS=86400 + - GOOD_JOB_CLEANUP_PRESERVED_JOBS_BEFORE_SECONDS_AGO=604800 + - HYKU_ADMIN_HOST=adl.test + - HYKU_DEFAULT_HOST=%{tenant}.adl.test + - HYKU_ROOT_HOST=adl.test + - HYRAX_ACTIVE_JOB_QUEUE=good_job + - VIRTUAL_HOST=.adl.test + - VIRTUAL_PORT=3000 + + depends_on: + db: + condition: service_started + solr: + condition: service_started + fcrepo: + condition: service_started + redis: + condition: service_started + zoo: + condition: service_started + check_volumes: + condition: service_started + chrome: + condition: service_started + worker: + condition: service_started + initialize_app: + condition: service_completed_successfully worker: <<: *app-worker diff --git a/hyrax-webapp b/hyrax-webapp index 571c2de..1efcc99 160000 --- a/hyrax-webapp +++ b/hyrax-webapp @@ -1 +1 @@ -Subproject commit 571c2debe9015f3e6ec92d8ced8a77be1a3b193d +Subproject commit 1efcc99159a049c5604f71ce29d5cf3b096dbe93