From a597f3a7db0984d69e8696270ede38e0b1b93fd1 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Thu, 25 Jan 2024 18:13:00 +0100 Subject: [PATCH] fixup! Mutualize getting ArcWarcRecord content --- src/warc2zim/converter.py | 6 +++--- src/warc2zim/items.py | 4 ++-- src/warc2zim/utils.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/warc2zim/converter.py b/src/warc2zim/converter.py index b92e1781..f3c511c6 100644 --- a/src/warc2zim/converter.py +++ b/src/warc2zim/converter.py @@ -47,7 +47,7 @@ from warc2zim.items import StaticArticle, WARCPayloadItem from warc2zim.url_rewriting import FUZZY_RULES, normalize from warc2zim.utils import ( - get_arc_warc_record_content, + get_record_content, get_record_mime_type, get_record_url, get_version, @@ -327,7 +327,7 @@ def gather_information_from_warc(self): main_page_found = True continue - content = get_arc_warc_record_content(record) + content = get_record_content(record) if not self.title: self.title = parse_title(content) @@ -417,7 +417,7 @@ def retrieve_illustration(self): "Illustration_48x48_at_1" ] return - self.illustration = get_arc_warc_record_content(record) + self.illustration = get_record_content(record) # favicon_url not in WARC ; downloading try: diff --git a/src/warc2zim/items.py b/src/warc2zim/items.py index 7897b548..c556ef63 100644 --- a/src/warc2zim/items.py +++ b/src/warc2zim/items.py @@ -20,7 +20,7 @@ from warc2zim.content_rewriting.js import JsRewriter from warc2zim.url_rewriting import ArticleUrlRewriter from warc2zim.utils import ( - get_arc_warc_record_content, + get_record_content, get_record_mime_type, get_record_url, ) @@ -44,7 +44,7 @@ def __init__( self.path = path self.mimetype = get_record_mime_type(record) self.title = "" - self.content = get_arc_warc_record_content(record) + self.content = get_record_content(record) if getattr(record, "method", "GET") == "POST": return diff --git a/src/warc2zim/utils.py b/src/warc2zim/utils.py index 8fe09d5d..a4aa9d13 100644 --- a/src/warc2zim/utils.py +++ b/src/warc2zim/utils.py @@ -52,7 +52,7 @@ def to_string(input_: str | bytes) -> str: return input_ # pyright: ignore[reportGeneralTypeIssues, reportReturnType] -def get_arc_warc_record_content(record: ArcWarcRecord): +def get_record_content(record: ArcWarcRecord): if hasattr(record, "buffered_stream"): stream = ( record.buffered_stream # pyright: ignore [reportGeneralTypeIssues, reportAttributeAccessIssue]