Skip to content

Commit

Permalink
Merge pull request #434 from openzim/upgrade_deps
Browse files Browse the repository at this point in the history
Upgrade dependencies - Python 3.13
  • Loading branch information
benoit74 authored Feb 3, 2025
2 parents eeeb554 + cd3251b commit acc8b06
Show file tree
Hide file tree
Showing 13 changed files with 45 additions and 50 deletions.
22 changes: 4 additions & 18 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://github.com/psf/black
rev: "24.10.0"
rev: "25.1.0"
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.4
rev: v0.9.4
hooks:
- id: ruff
- repo: https://github.com/RobertCraigie/pyright-python
rev: v1.1.391
rev: v1.1.393
hooks:
- id: pyright
name: pyright (system)
Expand All @@ -25,17 +25,3 @@ repos:
'types_or': [python, pyi]
require_serial: true
minimum_pre_commit_version: '2.9.2'
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.1.0
hooks:
- id: prettier
files: javascript\/.*$ # files in javascript folder
args:
- --config
- javascript/.prettierrc.json
- repo: https://github.com/pre-commit/mirrors-eslint
rev: v8.51.0
hooks:
- id: eslint
types: [file]
files: javascript\/src\/.*(?:\.[jt]sx?|\.vue)$ # *.js, *.jsx, *.ts, *.tsx, *.vue in javascript/src folder
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- Upgrade dependencies: Python 3.13, zimscraperlib 5.1.0 and others (#434)

## [2.2.0] - 2024-01-10

### Changed
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.12-slim-bookworm
FROM python:3.13-slim-bookworm
LABEL org.opencontainers.image.source=https://github.com/openzim/warc2zim

RUN apt-get update -y \
Expand Down
3 changes: 2 additions & 1 deletion contrib/cleanup_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import sys
from pathlib import Path

from zimscraperlib.rewriting.url_rewriting import ZimPath

from warc2zim.constants import logger
from warc2zim.url_rewriting import ZimPath


def notify(_: ZimPath):
Expand Down
11 changes: 7 additions & 4 deletions contrib/html_convert.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" html rewrite test utility
"""html rewrite test utility
This utility takes a given HTML content as input, base64 encoded, its original URL, and
rewrites its content.
Expand All @@ -17,9 +17,10 @@
import sys
from pathlib import Path

from zimscraperlib.rewriting.html import HtmlRewriter
from zimscraperlib.rewriting.url_rewriting import ArticleUrlRewriter, HttpUrl, ZimPath

from warc2zim.constants import logger
from warc2zim.content_rewriting.html import HtmlRewriter
from warc2zim.url_rewriting import ArticleUrlRewriter, HttpUrl, ZimPath
from warc2zim.utils import to_string


Expand All @@ -37,7 +38,9 @@ def main(path_to_content: str, article_url: str, encoding: str | None = None):
content = Path(path_to_content)

url_rewriter = ArticleUrlRewriter(
HttpUrl(article_url), existing_zim_paths=set(), missing_zim_paths=set()
article_url=HttpUrl(article_url),
existing_zim_paths=set(),
missing_zim_paths=set(),
)

html_rewriter = HtmlRewriter(url_rewriter, "", None, notify)
Expand Down
6 changes: 3 additions & 3 deletions contrib/marxists.org.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" MIA English exclude list
"""MIA English exclude list
This utility computes the list of all subpages/languages that must be ignored for the
English ZIM of The Marxists Internet Archive (MIA) at www.marxists.org.
Expand All @@ -23,9 +23,9 @@
subfolders = set()
REGEX = re.compile(r"\.\.\/(?P<subfolder>.*?)\/")
for anchor in soup.find_all("a"):
if not anchor.has_attr("href"):
if not anchor.has_attr("href"): # pyright: ignore
continue
if match := REGEX.match(anchor["href"]):
if match := REGEX.match(anchor["href"]): # pyright: ignore
subfolders.add(match.group("subfolder"))

print("|".join(sorted(subfolders))) # noqa: T201
28 changes: 14 additions & 14 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
[build-system]
requires = ["hatchling", "hatch-openzim==0.2.1"]
requires = ["hatchling", "hatch-openzim"]
build-backend = "hatchling.build"

[project]
name = "warc2zim"
requires-python = ">=3.12,<3.13"
requires-python = ">=3.13,<3.14"
description = "Convert WARC to ZIM"
readme = "README.md"
dependencies = [
"warcio==1.7.5",
"requests==2.32.3",
"zimscraperlib==5.0.0rc3",
"jinja2==3.1.4", # also update version in build-system above and in build_js.sh
"zimscraperlib==5.1.0",
"jinja2==3.1.5",
# to support possible brotli content in warcs, must be added separately
"brotlipy==0.7.0",
"cdxj_indexer==1.4.6",
"tinycss2==1.4.0",
"beautifulsoup4==4.12.3", # used to parse base href
"beautifulsoup4==4.13.1", # used to parse base href
"lxml==5.3.0", # used by beautifulsoup4 for parsing html
"python-dateutil==2.9.0.post0",
]
Expand All @@ -35,19 +35,19 @@ scripts = [
"invoke==2.2.0",
]
lint = [
"black==24.10.0",
"ruff==0.8.4",
"black==25.1.0",
"ruff==0.9.4",
]
check = [
"pyright==1.1.391",
"pyright==1.1.393",
]
test = [
"pytest==8.3.4",
"coverage==7.6.9",
"coverage==7.6.10",
]
dev = [
"pre-commit==4.0.1",
"debugpy==1.8.11",
"pre-commit==4.1.0",
"debugpy==1.8.12",
"warc2zim[scripts]",
"warc2zim[lint]",
"warc2zim[test]",
Expand Down Expand Up @@ -103,10 +103,10 @@ all = "inv checkall --args '{args}'"

[tool.black]
line-length = 88
target-version = ['py312']
target-version = ['py313']

[tool.ruff]
target-version = "py312"
target-version = "py313"
line-length = 88
src = ["src"]

Expand Down Expand Up @@ -229,6 +229,6 @@ exclude_lines = [
include = ["src", "tests", "tasks.py"]
exclude = [".env/**", ".venv/**"]
extraPaths = ["src"]
pythonVersion = "3.12"
pythonVersion = "3.13"
typeCheckingMode = "basic"
disableBytesTypePromotions = true
2 changes: 1 addition & 1 deletion src/warc2zim/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from zimscraperlib import getLogger
from zimscraperlib.logging import getLogger

# Shared logger with default log level at this stage
logger = getLogger("warc2zim")
8 changes: 4 additions & 4 deletions src/warc2zim/converter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# vim: ai ts=4 sts=4 et sw=4 nu

""" warc2zim conversion utility
"""warc2zim conversion utility
This utility provides a conversion from WARC records to ZIM files.
WARC record are directly stored in a zim file as:
Expand Down Expand Up @@ -739,7 +739,7 @@ def find_icon_and_language(self, record, content):
lang_elem = soup.find("html", attrs={"lang": True})
if lang_elem:
self.language = parse_language(
lang_elem.attrs[ # pyright: ignore[reportGeneralTypeIssues ,reportAttributeAccessIssue]
lang_elem.attrs[ # pyright: ignore[reportArgumentType, reportAttributeAccessIssue]
"lang"
]
)
Expand All @@ -751,7 +751,7 @@ def find_icon_and_language(self, record, content):
)
if lang_elem:
self.language = parse_language(
lang_elem.attrs[ # pyright: ignore[reportGeneralTypeIssues ,reportAttributeAccessIssue]
lang_elem.attrs[ # pyright: ignore[reportArgumentType ,reportAttributeAccessIssue]
"content"
]
)
Expand All @@ -761,7 +761,7 @@ def find_icon_and_language(self, record, content):
lang_elem = soup.find("meta", {"name": "language", "content": True})
if lang_elem:
self.language = parse_language(
lang_elem.attrs[ # pyright: ignore[reportGeneralTypeIssues ,reportAttributeAccessIssue]
lang_elem.attrs[ # pyright: ignore[reportArgumentType ,reportAttributeAccessIssue]
"content"
]
)
Expand Down
2 changes: 1 addition & 1 deletion src/warc2zim/items.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# vim: ai ts=4 sts=4 et sw=4 nu

""" warc2zim's item classes
"""warc2zim's item classes
This module contains the differents Item we may want to add to a Zim archive.
"""
Expand Down
2 changes: 1 addition & 1 deletion src/warc2zim/rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@


def no_title(
function: Callable[..., str | bytes]
function: Callable[..., str | bytes],
) -> Callable[..., tuple[str, str | bytes]]:
"""Decorator for methods transforming content without extracting a title.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ class CharsetsTestData:
expected_strings: list[str]


def get_testdata() -> Generator[CharsetsTestData, None, None]:
def get_testdata() -> Generator[CharsetsTestData]:
data = json.loads(
(Path(__file__).parent / "encodings" / "definition.json").read_bytes()
)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_warc_to_zim.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

import pytest
import requests
from zimscraperlib.image.conversion import convert_image, convert_svg2png, resize_image
from zimscraperlib.image.conversion import convert_image, convert_svg2png
from zimscraperlib.image.probing import format_for
from zimscraperlib.image.transformation import resize_image
from zimscraperlib.zim import Archive

from warc2zim.__about__ import __version__
Expand Down

0 comments on commit acc8b06

Please sign in to comment.