Skip to content

Commit

Permalink
Added exception for invalid XML
Browse files Browse the repository at this point in the history
  • Loading branch information
crugas committed Jun 10, 2020
1 parent 41ba715 commit f204a76
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 23 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ dist
build
setup_script-001.iss
setup_script-002.iss
Output
Output
invalid_eads
harvard-schematron.xml
39 changes: 23 additions & 16 deletions as_xtf_GUI.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,9 +443,9 @@ def get_aspace_log(defaults, xtf_checkbox=True):
correct_creds = False
close_program = False
while correct_creds is False:
asplog_col1 = [[sg.Text("Enter your ArchivesSpace username:", font=("Roboto", 12))],
[sg.Text("Enter your ArchivesSpace password:", font=("Roboto", 12))],
[sg.Text("Enter your ArchivesSpace API URL:", font=("Roboto", 12))]]
asplog_col1 = [[sg.Text("Enter your ArchivesSpace username:", font=("Roboto", 11))],
[sg.Text("Enter your ArchivesSpace password:", font=("Roboto", 11))],
[sg.Text("Enter your ArchivesSpace API URL:", font=("Roboto", 11))]]
asplog_col2 = [[sg.InputText(focus=True, key="_ASPACE_UNAME_")],
[sg.InputText(password_char='*', key="_ASPACE_PWORD_")],
[sg.InputText(defaults["as_api"], key="_ASPACE_API_")]]
Expand Down Expand Up @@ -519,11 +519,11 @@ def get_xtf_log(defaults):
correct_creds = False
close_program = False
while correct_creds is False:
xtflog_col1 = [[sg.Text("Enter your XTF username:", font=("Roboto", 12))],
[sg.Text("Enter your XTF password:", font=("Roboto", 12))],
[sg.Text("Enter XTF Hostname:", font=("Roboto", 12))],
[sg.Text("Enter XTF Remote Path:", font=("Roboto", 12))],
[sg.Text("Enter XTF Indexer Path:", font=("Roboto", 12))]]
xtflog_col1 = [[sg.Text("Enter your XTF username:", font=("Roboto", 11))],
[sg.Text("Enter your XTF password:", font=("Roboto", 11))],
[sg.Text("Enter XTF Hostname:", font=("Roboto", 11))],
[sg.Text("Enter XTF Remote Path:", font=("Roboto", 11))],
[sg.Text("Enter XTF Indexer Path:", font=("Roboto", 11))]]
xtflog_col2 = [[sg.InputText(focus=True, key="_XTF_UNAME_")],
[sg.InputText(password_char='*', key="_XTF_PWORD_")],
[sg.InputText(defaults["xtf_default"]["xtf_host"], key="_XTF_HOSTNAME_")],
Expand Down Expand Up @@ -614,16 +614,23 @@ def get_eads(input_ids, defaults, cleanup_options, repositories, client, values_
if defaults["ead_export_default"]["_CLEAN_EADS_"] is True:
if defaults["ead_export_default"]["_KEEP_RAW_"] is True:
print("Cleaning up EAD record...", end='', flush=True)
results = clean.cleanup_eads(resource_export.filepath, cleanup_options,
defaults["ead_export_default"]["_OUTPUT_DIR_"],
keep_raw_exports=True)
print("Done")
valid, results = clean.cleanup_eads(resource_export.filepath, cleanup_options,
defaults["ead_export_default"]["_OUTPUT_DIR_"],
keep_raw_exports=True)
if valid:
print("Done")
print(results)
else:
print("XML validation error\n" + results)
else:
print("Cleaning up EAD record...", end='', flush=True)
results = clean.cleanup_eads(resource_export.filepath, cleanup_options,
defaults["ead_export_default"]["_OUTPUT_DIR_"])
print("Done")
print(results)
valid, results = clean.cleanup_eads(resource_export.filepath, cleanup_options,
defaults["ead_export_default"]["_OUTPUT_DIR_"])
if valid:
print("Done")
print(results)
else:
print("XML validation error\n" + results)
else:
print(resource_export.error + "\n")
else:
Expand Down
16 changes: 11 additions & 5 deletions cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,16 +358,22 @@ def cleanup_eads(filepath, custom_clean, output_dir="clean_eads", keep_raw_expor
custom_clean (list): strings as passed from as_xtf_GUI.py that determines what methods will be run against the
lxml element when running the clean_suite() method. The user can specify what they want cleaned in
as_xtf_GUI.py, so this is how those specifications are passed.
output_dir (str, optional): filepath of where the EAD record should be sent after cleaning, as specified by the user ("clean_eads" is default)
keep_raw_exports (bool, optional): if a user in as_xtf_GUI.py specifies to keep the exports that come from
output_dir (str): filepath of where the EAD record should be sent after cleaning, as specified by the user ("clean_eads" is default)
keep_raw_exports (bool): if a user in as_xtf_GUI.py specifies to keep the exports that come from
as_export.py, this parameter will prevent the function from deleting those files in source_eads.
Returns:
results (str): filled with result information when methods are performed
"""
filename = Path(filepath).name # get file name + extension
valid_err = ""
parser = etree.XMLParser(remove_blank_text=True, ns_clean=True) # clean up redundant namespace declarations
tree = etree.parse(filepath, parser=parser)
try:
tree = etree.parse(filepath, parser=parser)
except:
valid_err += "There was an error with the xml data. The file is saved in {}\n\n".format(Path(filepath).parent)
valid_err += "-" * 135
return False, valid_err
ead_root = tree.getroot()
ead = EADRecord(ead_root)
clean_ead, results = ead.clean_suite(ead, custom_clean)
Expand All @@ -383,7 +389,7 @@ def cleanup_eads(filepath, custom_clean, output_dir="clean_eads", keep_raw_expor
os.remove(filepath)
if keep_raw_exports is False: # prevents program from rerunning cleanup on cleaned files
os.remove(filepath)
return results
return True, results
else:
results += "\nKeeping raw ASpace exports in {}\n".format(output_dir)
return results
return True, results
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
loguru==0.5.0
lxml==4.5.1
paramiko==2.7.1
PySimpleGUI==4.19.0.2
PySimpleGUI==4.20.0
scp==0.13.2

0 comments on commit f204a76

Please sign in to comment.