Skip to content

Commit

Permalink
Merge pull request #380 from notch8/fix-resplit
Browse files Browse the repository at this point in the history
Fix pdf resplitting
  • Loading branch information
laritakr authored Jan 29, 2025
2 parents 55cbaaf + 4b43117 commit 98faa0a
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 17 deletions.
7 changes: 4 additions & 3 deletions app/controllers/iiif_print/split_pdfs_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ class SplitPdfsController < ApplicationController
before_action :authenticate_user!

def create
@file_set = FileSet.where(id: params[:file_set_id]).first
@file_set = IiifPrint.find_by(id: params[:file_set_id])
authorize_create_split_request!(@file_set)
IiifPrint::Jobs::RequestSplitPdfJob.perform_later(file_set: @file_set, user: current_user)

IiifPrint::Jobs::RequestSplitPdfJob.perform_later(file_set_id: @file_set.to_param, user: current_user)
respond_to do |wants|
wants.html { redirect_to polymorphic_path([main_app, @file_set]), notice: t("iiif_print.file_set.split_submitted", id: @file_set.id) }
wants.json { render json: { id: @file_set.id, to_param: @file_set.to_param }, status: :ok }
Expand All @@ -27,7 +28,7 @@ def authorize_create_split_request!(file_set)
# Rely on CanCan's authorize! method. We could add the :split_pdf action to the ability
# class. But we're pigging backing on the idea that you can do this if you can edit the work.
authorize!(:edit, file_set)
raise "Expected #{file_set.class} ID=#{file_set.id} #to_param=#{file_set.to_param} to be a PDF. Instead found mime_type of #{file_set.mime_type}." unless file_set.pdf?
raise "Expected #{file_set.class} ID=#{file_set.id} #to_param=#{file_set.to_param} to be a PDF. Instead found mime_type of #{file_set.mime_type}." unless IiifPrint.pdf?(file_set)

work = IiifPrint.parent_for(file_set)
raise WorkNotConfiguredToSplitFileSetError.new(file_set: file_set, work: work) unless work&.iiif_print_config&.pdf_splitter_job&.presence
Expand Down
14 changes: 7 additions & 7 deletions app/jobs/iiif_print/jobs/request_split_pdf_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ module Jobs
# Encapsulates logic for cleanup when the PDF is destroyed after pdf splitting into child works
class RequestSplitPdfJob < IiifPrint::Jobs::ApplicationJob
##
# @param file_set [FileSet]
# @param file_set_id [FileSet id]
# @param user [User]
# rubocop:disable Metrics/MethodLength
def perform(file_set:, user:)
return true unless file_set.pdf?

def perform(file_set_id:, user:)
file_set = IiifPrint.find_by(id: file_set_id)
return true unless IiifPrint.pdf?(file_set)
work = IiifPrint.parent_for(file_set)

# Woe is ye who changes the configuration of the model, thus removing the splitting.
Expand All @@ -18,11 +18,11 @@ def perform(file_set:, user:)
# clean up any existing spawned child works of this file_set
IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of(
file_set: file_set,
work: work
work: work,
user: user
)

location = Hyrax::WorkingDirectory.find_or_retrieve(file_set.files.first.id, file_set.id)

location = IiifPrint.pdf_path_for(file_set: file_set)
IiifPrint.conditionally_submit_split_for(work: work, file_set: file_set, locations: [location], user: user)
end
# rubocop:enable Metrics/MethodLength
Expand Down
1 change: 1 addition & 0 deletions lib/iiif_print.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class << self
:solr_construct_query,
:solr_name,
:solr_query,
:pdf_path_for,
to: :persistence_adapter
)
end
Expand Down
4 changes: 4 additions & 0 deletions lib/iiif_print/persistence_layer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ def self.copy_derivatives_from_data_store(stream:, directives:)
def self.extract_text_for(file_set:)
raise NotImplementedError, "#{self}.{__method__}"
end

def self.pdf_path_for(file_set:)
raise NotImplementedError, "#{self}.{__method__}"
end
end
end
end
9 changes: 9 additions & 0 deletions lib/iiif_print/persistence_layer/active_fedora_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,15 @@ def self.copy_derivatives_from_data_store(*)
def self.extract_text_for(file_set:)
IiifPrint.config.all_text_generator_function.call(object: file_set) || ''
end

##
# Location of the file for resplitting
#
# @param [FileSet] an ActiveFedora fileset
# @return [String] location of the original file
def self.pdf_path_for(file_set:)
Hyrax::WorkingDirectory.find_or_retrieve(file_set.files.first.id, file_set.id)
end
end
end
end
39 changes: 32 additions & 7 deletions lib/iiif_print/persistence_layer/valkyrie_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,28 @@ def self.solr_name(field_name)
Hyrax.config.index_field_mapper.solr_name(field_name.to_s)
end

# rubocop:disable Lint/UnusedMethodArgument
# NOTE: this isn't the most efficient method, but it is the most reliable.
# Attribute 'split_from_pdf_id' is saved in Valkyrie as a string rather than as { id: string },
# so we can't use the 'find_inverse_references_by' query.
# Additionally, the attribute does not exist on all child works, as it was added later, so using
# a child work's title allows us to find child works when the attribute isn't present.
# Building a custom query to find these child works directly via the attribute would be more efficient.
# However, it would require more effort for a lesser-used feature, and would not allow for the fallback
# of finding child works by title.
def self.destroy_children_split_from(file_set:, work:, model:, user:)
# rubocop:enable Lint/UnusedMethodArgument
# look for child records by the file set id they were split from
Hyrax.query_service.find_inverse_references_by(resource: file_set, property: :split_from_pdf_id, model: model).each do |child|
Hyrax.persister.delete(resource: child)
Hyrax.indexing_service.delete(resource: child)
Hyrax.publisher.publish('object.deleted', object: child, user: user)
all_child_works = Hyrax.custom_queries.find_child_works(resource: work)
return if all_child_works.blank?
# look first for children by the file set id they were split from
children = all_child_works.select { |m| m.split_from_pdf_id == file_set.id }
if children.blank?
# find works where file name and work `to_param` are both in the title
children = all_child_works.select { |m| m.title.include?(file_set.label) && m.title.include?(work.to_param) }
end
return if children.blank?
children.each do |rcd|
Hyrax.persister.delete(resource: rcd)
Hyrax.index_adapter.delete(resource: rcd)
Hyrax.publisher.publish('object.deleted', object: rcd, user: user)
end
true
end
Expand Down Expand Up @@ -176,6 +190,17 @@ def self.extract_text_for(file_set:)
return if text_fm.nil?
text_fm.content
end

##
# Location of the file for resplitting
#
# @param [Hyrax::FileSet] a Valkyrie fileset
# @return [String] location of the original file
def self.pdf_path_for(file_set:)
file = file_set.original_file
return '' unless file.pdf?
file.file.disk_path.to_s
end
end
end
end

0 comments on commit 98faa0a

Please sign in to comment.