-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbagcheck.py
103 lines (91 loc) · 3.62 KB
/
bagcheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import bagit
import os
import json
import argparse
import datetime
def get_baglist(filepath, *args):
if os.path.exists(filepath):
with open(filepath) as f:
baglist = json.loads(f.read())
else:
baglist = {
'date created': datetime.datetime.now().isoformat(),
'base directory': args[0], 'bags': {}}
return(baglist)
def find_bags(baglist):
bags = []
for root, _, files in os.walk(baglist['base directory']):
if 'bagit.txt' in files:
bags.append(
os.path.relpath(root, start=baglist['base directory']))
return(bags)
def report(baglist, all=True):
print(', '.join([
'directory', 'check_date', 'check_status',
'details', 'since_last_check']))
for bag, checks in baglist['bags'].items():
if not all and checks[-1]['status'] == 'VALID':
pass
else:
latest = datetime.datetime.fromisoformat(checks[-1]['date'])
interval = datetime.datetime.now()-latest
print(', '.join([
bag, latest.isoformat(), checks[-1]['status'],
str(checks[-1].get('details')),
"{i.days} days since last check".format(i=interval)]))
def validate(baglist, baglistfile):
bags = find_bags(baglist)
for bagdir in bags:
if bagdir not in baglist['bags'].keys():
print('New bag: ', bagdir)
baglist['bags'].update({bagdir: []})
try:
bag = bagit.Bag(os.path.join(baglist['base directory'], bagdir))
bag.validate()
print(bagdir, 'is valid')
baglist['bags'][bagdir].append({
'date': datetime.datetime.now().isoformat(),
'status': 'VALID'})
except (bagit.BagError, bagit.BagValidationError) as e:
print(e)
if hasattr(e, 'message'):
e = e.message
else:
e = e.args[0]
baglist['bags'][bagdir].append({
'date': datetime.datetime.now().isoformat(),
'status': 'INVALID', 'details': e})
missing = [bag for bag in baglist['bags'].keys() if bag not in bags]
for bag in missing:
print('Missing: ', bag)
baglist['bags'][bag].append({
'date': datetime.datetime.now().isoformat(),
'status': 'MISSING'})
with open(baglistfile, 'w') as f:
f.write(json.dumps(baglist, indent=1))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Validate some bags.')
parser.add_argument(
'directory', metavar='i', type=str, nargs='?',
help='the base directory with your bags.'
'Not required if using an existing baglist')
parser.add_argument(
'--baglist', dest='baglist', type=str, default='bags.json',
help='Location of a new or existing bag list. If it already exists,'
'directory will be ignored in favor of the base directory defined in '
'the list.')
parser.add_argument(
'--report', action='store_true',
help='report on latest validations in csv (pipe to a text file)')
parser.add_argument(
'--failed', action='store_false', default=True,
help='filter report to failed checks')
args = parser.parse_args()
if args.directory is not None:
os.chdir(args.directory)
baglist = get_baglist(args.baglist, args.directory)
os.chdir(baglist['base directory'])
if args.report:
report(baglist, all=args.failed)
else:
validate(baglist, args.baglist)