From db8c7ac94daccb4f7846d8b57e4144da0709cc5d Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 8 Feb 2024 11:40:13 +0100 Subject: [PATCH] Add logging in case of invalid CSS. --- src/warc2zim/content_rewriting/css.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/warc2zim/content_rewriting/css.py b/src/warc2zim/content_rewriting/css.py index ec2c3728..0b0eb9b5 100644 --- a/src/warc2zim/content_rewriting/css.py +++ b/src/warc2zim/content_rewriting/css.py @@ -10,6 +10,7 @@ ) from tinycss2.serializer import serialize_url +from warc2zim.constants import logger from warc2zim.content_rewriting import UrlRewriterProto from warc2zim.content_rewriting.rx_replacer import RxRewriter @@ -51,6 +52,13 @@ def rewrite(self, content: str | bytes) -> str: # If tinycss fail to parse css, it will generate a "Error" token. # Exception is raised at serialization time. # We try/catch the whole process to be sure anyway. + logger.warning( + ( + "Css transformation fails. Fallback to regex rewriter.\n" + "Article path is %s" + ), + self.url_rewriter.article_url, + ) return self.fallback_rewriter.rewrite_content(content, {}) return output @@ -64,6 +72,13 @@ def rewrite_inline(self, content: str) -> str: # If tinycss fail to parse css, it will generate a "Error" token. # Exception is raised at serialization time. # We try/catch the whole process to be sure anyway. + logger.warning( + ( + "Css transformation fails. Fallback to regex rewriter.\n" + "Content is `%s`" + ), + content, + ) return self.fallback_rewriter.rewrite_content(content, {}) def process_list(self, components: Iterable[ast.Node]):