Skip to content
This repository has been archived by the owner on Mar 15, 2020. It is now read-only.

Commit

Permalink
Try to recover for wrong namespace usage in href attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
rupor-github committed Jun 3, 2017
1 parent 7e4b285 commit 7127eb1
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 7 deletions.
38 changes: 32 additions & 6 deletions modules/fb2html.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,8 @@ def parse_description(self, elem):
middlename = ''
firstname = ''

self.log.debug('Parsing desctiption')

for e in elem:
if ns_tag(e.tag) == 'document-info':
for t in e:
Expand Down Expand Up @@ -498,6 +500,10 @@ def parse_description(self, elem):
if ns_tag(a) == 'href':
self.book_cover = c.attrib[a][1:]
break
elif ':href' in a:
self.book_cover = c.attrib[a][1:]
self.log.warning('Wrong namespace is used for href attribute for cover page: {0}. Will attempt to recover...'.format(c.attrib))
break

elif ns_tag(t.tag) == 'genre':
self.genres.append(t.text)
Expand Down Expand Up @@ -549,6 +555,7 @@ def parse_description(self, elem):
def parse_binary(self, elem):
if elem.attrib['id'] and elem.attrib['content-type']:
have_file = False
self.log.debug('Parsing binary {0}'.format(elem.attrib))
id = elem.attrib['id']
decl_type = elem.attrib['content-type'].lower()
buff = base64.b64decode(elem.text.encode('ascii'))
Expand Down Expand Up @@ -580,7 +587,7 @@ def parse_binary(self, elem):
bg.save(full_name, dpi=img.info.get("dpi"))
have_file = True
except:
self.log.warning('Error while removing transparency for ref-id "{0}" in file "{1}"'.format(id, filename))
self.log.warning('Unable to remove transparency for ref-id "{0}" in file "{1}"'.format(id, filename))
self.log.debug('Getting details:', exc_info=True)

self.image_count += 1
Expand All @@ -593,7 +600,7 @@ def parse_binary(self, elem):
full_name = os.path.join(os.path.join(self.temp_content_dir, 'images'), filename)
self.image_count += 1
else:
self.log.warning('Error while processing binary for ref-id "{0}". Skipping...'.format(id))
self.log.error('Unable to process binary for ref-id "{0}". Skipping...'.format(id))
# self.log.debug('Getting details:', exc_info=True)
return

Expand Down Expand Up @@ -678,16 +685,20 @@ def parse_image(self, elem):
img_id = None
int_id = None
alt = None

for a in elem.attrib:
if ns_tag(a) == 'href':
int_id = elem.attrib[a][1:]
elif ':href' in a:
self.log.warning('Wrong namespace is used for href attribute in <image>: {0}. Will attempt to recover...'.format(elem.attrib))
int_id = elem.attrib[a][1:]
elif ns_tag(a) == 'id':
img_id = elem.attrib[a]
elif ns_tag(a) == 'alt':
alt = elem.attrib[a]

if not int_id:
self.log.warning('Unable to find image ref-id for "{0}". Something is very wrong, skipping...'.format(elem))
self.log.error('Unable to find image ref-id in "{0}" "{1}.'.format(elem.tag, elem.attrib))
return

filename = None
Expand All @@ -696,7 +707,7 @@ def parse_image(self, elem):
filename = file
break
if not filename:
self.log.warning('Unable to find image for ref-id "{0}". Something is very wrong...'.format(int_id))
self.log.error('Unable to find image for ref-id "{0}" in "{1}" "{2}.'.format(int_id, elem.tag, elem.attrib))
filename = "nonexistent.gif"
alt = int_id

Expand All @@ -719,7 +730,19 @@ def parse_image(self, elem):
self.parse_format(elem)

def parse_a(self, elem):
self.parse_format(elem, 'a', 'anchor', href=elem.attrib['{http://www.w3.org/1999/xlink}href'])
href = None
for name in elem.attrib:
if ns_tag(name) == 'href':
href = elem.attrib[name]
break
elif ':href' in name:
self.log.warning('Wrong namespace is used for href attribute in <a>: {0}. Will attempt to recover...'.format(elem.attrib))
href = elem.attrib[name]
break
if not href:
self.log.error('Unable to find href attribute in <a>: {0}.'.format(elem.attrib))

self.parse_format(elem, 'a', 'anchor', href=href)

def parse_p(self, elem):
ptag = 'p'
Expand Down Expand Up @@ -1020,6 +1043,9 @@ def insert_hyphenation(self, s):
return html.unescape(s) if not self.hyphenate or not self.hyphenator or self.header or self.subheader else self.hyphenator.hyphenate_text(html.unescape(s), self.replaceNBSP)

def parse_body(self, elem):

self.log.debug('Parsing body: {0}'.format(elem.attrib))

self.body_name = elem.attrib['name'] if 'name' in elem.attrib else ''
self.current_header_level = 0
self.first_header_in_body = True
Expand Down Expand Up @@ -1225,7 +1251,7 @@ def generate_cover(self):
filename = file
break
if not filename:
self.log.warning('Unable to find book cover image for ref-id "{0}". Something is very wrong...'.format(self.book_cover))
self.log.error('Unable to find book cover image for ref-id "{0}".'.format(self.book_cover))
self.book_cover = ''
return

Expand Down
2 changes: 1 addition & 1 deletion version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

WINDOWS = platform.system().lower() == "windows"

VERSION = u'3.6.25'
VERSION = u'3.6.26'

0 comments on commit 7127eb1

Please sign in to comment.