-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconverter.py
41 lines (36 loc) · 1.54 KB
/
converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os, re
import json
import pdb
import collections
from django.utils.text import slugify
sourceLink = 'http://www.perseus.tufts.edu/hopper/opensource/download'
source = 'Perseus'
def main():
if not os.path.exists('cltk_json'):
os.makedirs('cltk_json')
for root, dirs, files in os.walk("."):
path = root.split('/')
for fname in files:
if fname.endswith('txt'):
#print((len(path) - 1) * '---', os.path.basename(root))
title = path[-1]+'_'+fname
title = title.split('.')[0].title()
work = {
'originalTitle': title,
'englishTitle': title,
'author': "Not Available",
'source': source,
'sourceLink': sourceLink,
'language': 'Old_Norse',
'text': {},
}
text = open(os.path.join(root, fname)).read().splitlines()
text = [textNode.strip() for textNode in text if len(textNode.strip())]
for i, textNode in enumerate(text):
work['text'][i] = textNode
fname = slugify(work['source']) + '__' + slugify(work['englishTitle']) + '__' + slugify(work['language']) + '.json'
fname = fname.replace(" ", "")
with open('cltk_json/' + fname, 'w') as f:
json.dump(work, f)
if __name__ == '__main__':
main()