Skip to content

Commit

Permalink
Work around encoding error when parsing invalid ATP links
Browse files Browse the repository at this point in the history
  • Loading branch information
simonrob committed Jan 1, 2024
1 parent c399733 commit 99eb0d0
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion plugins/IMAPCleanO365ATPLinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,18 @@ def edit_message(self, byte_message):
except UnicodeDecodeError:
# urlparse assumes ascii encoding which is not always the case; try to recover if possible
atp_url_query = atp_url.replace(b'&', b'&').rsplit(b'&data', 2)[0].partition(b'?')[2]
atp_url_parts = dict(urllib.parse.parse_qsl(atp_url_query))
try:
# parse_qsl not parse_qs because we only ever care about non-array values; extra dict formatting
# as IntelliJ has a bug incorrectly detecting parse_qs/l as returning a dict with byte-type keys
atp_url_parts = {str(key): value for key, value in urllib.parse.parse_qsl(atp_url_query)}
except UnicodeEncodeError:
# the encoding and errors parameters for parse_qsl are not actually passed to _encode_result, so invalid
# (or incorrectly hyperlinked) values can cause decoding errors - we temporarily patch as a workaround
# noinspection PyUnresolvedReferences,PyProtectedMember
original_encode_result = urllib.parse._encode_result
urllib.parse._encode_result = lambda obj, encoding='utf-8', err='replace': obj.encode(encoding, err)
atp_url_parts = {str(key): value for key, value in urllib.parse.parse_qsl(atp_url_query)}
urllib.parse._encode_result = original_encode_result
if b'url' in atp_url_parts:
edited_message += atp_url_parts[b'url']
link_count += 1
Expand Down

0 comments on commit 99eb0d0

Please sign in to comment.