-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcards.py
63 lines (60 loc) · 2.27 KB
/
cards.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import json
from lxml import etree
# This class finds all informaition about a card
class CardHandler:
def __init__(self, dataset, number_of_card):
# Load JSON-file
if dataset == 'cci':
json_file_name = dataset + '/documents/Doc' + number_of_card + '.json'
try:
json_file = open(json_file_name, 'r')
except IOError:
print('No such file or directory: ' + json_file_name)
return
else:
self.dict = json.loads(json_file.read())
json_file.close()
else:
self.dict = {}
# Load DocNNN.html
doc_name = dataset +'/documents/Doc' + number_of_card + '.html'
sHTML_Parser = etree.HTMLParser(remove_comments = True)
try:
with open(doc_name, 'rb') as inp:
tree = etree.parse(inp, sHTML_Parser)
self.nodes = tree.xpath('/html/body/p')
if number_of_card == '1':
print('nodes="' + str(len(self.nodes))+'"')
self.size = os.path.getsize(doc_name)
except IOError:
print('No such file or directory: ' + doc_name)
return None
# Make a list of key words
if dataset == 'cci':
taxes = {
'Cardio-Loc': None,
'CHF-Words': None,
'DECLINE': None,
'dementia': None,
'MentalStatus': None,
'MI-Words': None,
'PVD-Words': None,
'FAMILY': None
}
keys = []
for tax in taxes:
try:
taxes[tax] = open('cci/indexes/' + tax + '.idx')
except IOError:
print('No such file or directory: cci/indexes/' + tax + '.idx')
continue
else:
for line in taxes[tax]:
if number_of_card == line.strip() != -1:
keys.append(tax)
break
taxes[tax].close()
self.key_words = ', '.join(keys)
else:
self.key_words = ''