-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcheck_add_vr.py
188 lines (146 loc) · 6.42 KB
/
check_add_vr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
'''
This code checks each entry in the dicom_ontology.owl file for an
explicit VR assignment. The issue is that the "new" entries -
the entries/tags not in the original set that were scraped from
the NEMA website and instantiated in Neurolex - do not have
VR values attached. The reason for this is that in the DICOM
standard documentation a listing of all of the tags are in Parts
06 and 07, but these lists do not include the definitions. The
definitions are given in tables in other Parts, so have to
be extracted from those tables.
This code checks to see if an entry specifies the VR value and
if not, retrieves it from the file
/home/karl/Work/INCF/XML_code/dicom_dict_vr.py
which was created by the code:
/home/karl/Work/INCF/XML_code/vr_generate_dict.py
This code skips reads lines until it finds
a line containing "Datatype Properties" and continues to read lines
until is finds a line that contains "dicom#dicom". It collects
lines in the variable "entry" until it encounters a line containing
" ." which is the ending character for an entry. If it finds
a line containing "dicom:VR" then it writes the entry after removing
an extra blank line that was written into the original owl file as
the line previous to the line containing the VR info. The code then
writes that entry to the output file and then reads on, collecting the
next entry. If the entry does not contain VR line, it queries the
dicom_dict_vr.py file to find the VR value and writes a line
containing the VR value in the correct format.
Note that since the code starts collecting entries with the "Datatype
Properties" section, everything above that needs to be pasted into
the resulting file to make a complete owl file. Also, the Class section
will be unchanged since there are no VR values for these terms
(since they are not official DICOM tags, but terms I extracted from the
official Part documents). The code checks for the presence of "xxxx" in
the dicom tag. If that string is in the tag then just write the entry
unchanged into the output file.
Sample Complete Entry
------------------------------------------------
### http://purl.org/nidash/dicom#dicom_00280011
dicom:dicom_00280011 rdf:type owl:DatatypeProperty ;
rdfs:label "Columns"^^xsd:string ;
obo:IAO_0000114 obo:IAO_0000428 ;
obo:IAO_0000115 "Number of columns in the image."^^xsd:string ;
dicom:dicom_xxxx0065 "(0028,0011)"^^xsd:string ;
dicom:VR "US"^^xsd:string ;
rdfs:subClassOf dc:identifier .
2018-09-04 - started
2018-09-14 - ran on full owl file and checked result into GitHub repo
Karl Helmer
Athinoula A. Martinos Center for Biomedical Imaging
Massachusetts General Hospital, 2018
'''
import os, sys
import re
import ast
#************************************************
#input parameters
inDir = '/home/karl/Work/INCF/dicom-ontology/'
inFilename = 'dicom_ontology.owl'
outDir = '/home/karl/Work/INCF/dicom-ontology/'
outFilename = 'dicom_ontology_new.owl'
vrDir = '/home/karl/Work/INCF/XML_code/'
vrFilename = 'dicom_dict_vr.dict'
startEntry = 'dicom#dicom'
endEntry = ' .'
startPlace = 'Datatype Properties'
#************************************************
def search_vr(entry):
#make 1 string rather than searching in each string individually
#this is faster than using some version of "any"
combined = ' '.join(entry)
test = 'dicom:VR' in combined
return test
def get_tag(entry):
# check each line in entry for the dicom tag
for e in entry:
if 'rdf:type' in e:
t = re.search('dicom_(.+?) ', e)
tag = t.group(1)
if tag:
#print "The DICOM tag is: ", tag
return tag
else:
#this will crash the program since no tag value is returned
print "no dicom tag value found in: ", entry
def get_vr(vrDir, vrFilename, tag):
if tag:
vrF = open(vrDir+vrFilename, 'r').read()
dicomDict = ast.literal_eval(vrF)
if tag:
vr = dicomDict.get(tag,'')[0]
#print "vr value is = ", vr
return vr
else:
print "vr value not found for tag = ", tag
return None
def add_vr_to_entry(vr, entry):
vrLine = ' dicom:VR "{}"^^xsd:string ;\n'.format(vr)
entry.insert(4,vrLine)
entry.insert(5,'\n')
return entry
def write_entry(entry, outFile):
for line in entry:
outFile.write(line)
outFile.write('\n\n')
def remove_sequential_blanks_in_entry(entry):
for i in range(len(entry)-1):
if entry[i] == entry[i+1]:
del entry[i]
break
return entry
def main():
# open the dicom ontology file and start reading
with open(inDir+inFilename, 'r') as inFile, open(outDir+outFilename, 'w') as outFile:
entry = []
copy = False
dt = False
for line in inFile:
if startPlace in line: #find "Datatype Properties" line and start here
dt = True
print "starting place is:", startPlace
if dt == True: #start check after Datatype Prop line
if startEntry in line:
copy = True
#print 'start of entry'
if endEntry in line:
copy = False
entry.append(line) #append the last line of entry
#print 'end of entry'
#now check the entry list as a whole
vrFlag = search_vr(entry) #see if the entry has a VR line
tag = get_tag(entry) #extract the tag from entry
print tag, vrFlag
if vrFlag == False and ("xxxx" not in tag):
vr = get_vr(vrDir, vrFilename, tag) #get vr value from the dict
entry1 = add_vr_to_entry(vr, entry)
write_entry(entry1, outFile) #write entry with added vr to outfile
entry = [] #clear entry list when finished
else:
entry2 = remove_sequential_blanks_in_entry(entry)
write_entry(entry2, outFile) #write unchanged entry to outfile
entry = []
elif copy:
entry.append(line)
##############################################################
if __name__ == "__main__":
main()