-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcomplete_xml_file
executable file
·59 lines (50 loc) · 1.9 KB
/
complete_xml_file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python
# Given a partial XML file (a prefix of one), this tells you which tags
# would be needed to close all open tags and writes them to the end of the
# XML file. If there are problems while reading the file (non-XML junk),
# it will point you to an editor so you can manually clean the file.
# This program was written to help recover XML files which had been
# corrupted in a hard drive failure. You should backup any XML files that
# you run it on unless you fully understand what this code does.
# Author: David McClosky
# Homepage: http://zorglish.org
# Code: http://github.com/dmcc
import xml.sax, sys
class XMLTagStack(xml.sax.ContentHandler):
def __init__(self):
self.stack = []
def startElement(self, name, attrs):
self.stack.append(name)
def endElement(self, name):
assert name == self.stack[-1]
self.stack.pop(-1)
def closing_tags(self):
s = ''
for tag in self.stack[::-1]:
s += "</%s>\n" % tag
return s
def check_or_repair(filename):
tag_stack = XMLTagStack()
parser = xml.sax.make_parser()
parser.setContentHandler(tag_stack)
try:
parser.parse(open(filename))
except xml.sax._exceptions.SAXParseException as e:
if e.getMessage() != 'no element found':
print("Error:", e)
print('There is probably junk at', e.getLineNumber(), e.getColumnNumber(), end=' ')
print("Remove it manually and run this again to complete the file.")
print(f"vi {filename} +{e.getLineNumber()}")
return
if tag_stack.stack:
print('stack:', tag_stack.stack)
f = open(filename, 'a')
f.write(tag_stack.closing_tags())
f.close()
else:
print("already complete")
if __name__ == "__main__":
for filename in sys.argv[1:]:
print("Filename:", filename)
check_or_repair(filename)
print()