From 2e334092c42f15162791670aa1121bc643ad8a27 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Wed, 17 Jan 2024 11:23:48 +0100 Subject: [PATCH 1/5] Do not care if temporary directory already exists --- ChangeLog | 3 +++ src/gutenberg2zim/entrypoint.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 5ccaf73..96b1130 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,9 @@ as of 2.0.0. ## [Unreleased] +### Fixed +- Do not fail if temporary directory already exists (#207) + ## [2.1.0] - 2023-08-18 ### Changed diff --git a/src/gutenberg2zim/entrypoint.py b/src/gutenberg2zim/entrypoint.py index eb111bd..7af99dc 100755 --- a/src/gutenberg2zim/entrypoint.py +++ b/src/gutenberg2zim/entrypoint.py @@ -111,7 +111,7 @@ def main(): logger.info("S3 Credentials OK. Continuing ... ") # create tmp dir - TMP_FOLDER_PATH.mkdir(parents=True) + TMP_FOLDER_PATH.mkdir(parents=True, exist_ok=True) languages = [ x.strip().lower() From ae230d01a6807fe77d5cab81572c0c325eba07d0 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Wed, 17 Jan 2024 11:44:55 +0100 Subject: [PATCH 2/5] Fix typo in `Scraper` metadata --- ChangeLog | 1 + src/gutenberg2zim/shared.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 96b1130..56afaf9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,6 +10,7 @@ as of 2.0.0. ### Fixed - Do not fail if temporary directory already exists (#207) +- Typo in `Scraper` ZIM metadata (#212) ## [2.1.0] - 2023-08-18 diff --git a/src/gutenberg2zim/shared.py b/src/gutenberg2zim/shared.py index 3f38fed..a26f1bb 100644 --- a/src/gutenberg2zim/shared.py +++ b/src/gutenberg2zim/shared.py @@ -44,7 +44,7 @@ def setup(filename, language, title, description, name): publisher="Kiwix", # type: ignore name=name, tags="_category:gutenberg;gutenberg", # type: ignore - scraper=f"gutengergtozim-{VERSION}", # type: ignore + scraper=f"gutenberg2zim-{VERSION}", # type: ignore date=date.today(), # type: ignore ).config_verbose(True) From 2cdc32e72f9fe829a0745a703c8a48e93c208eb0 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Wed, 17 Jan 2024 11:47:18 +0100 Subject: [PATCH 3/5] Allow to customize Publisher + harmonize default value --- ChangeLog | 6 ++++++ src/gutenberg2zim/entrypoint.py | 5 ++++- src/gutenberg2zim/shared.py | 4 ++-- src/gutenberg2zim/zim.py | 2 ++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 56afaf9..ae9bfaa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,12 @@ as of 2.0.0. ## [Unreleased] +### Added +- `Publisher` ZIM metadata can now be customized at CLI (#210) + +### Changed +- `Publisher` ZIM metadata default value is changed to `openZIM` intead of `Kiwix` (#210) + ### Fixed - Do not fail if temporary directory already exists (#207) - Typo in `Scraper` ZIM metadata (#212) diff --git a/src/gutenberg2zim/entrypoint.py b/src/gutenberg2zim/entrypoint.py index 7af99dc..ebda3e8 100755 --- a/src/gutenberg2zim/entrypoint.py +++ b/src/gutenberg2zim/entrypoint.py @@ -22,7 +22,7 @@ """[--prepare] [--parse] [--download] [--export] [--dev] """ """[--zim] [--complete] [-m ONE_LANG_ONE_ZIM_FOLDER] """ """[--title-search] [--bookshelves] [--optimization-cache S3URL] """ - """[--stats-filename STATS_FILENAME]""" + """[--stats-filename STATS_FILENAME] [--publisher ZIM_PUBLISHER]""" """ -h --help Display this help message @@ -63,6 +63,7 @@ --use-any-optimized-version Try to use any optimized version found on """ """optimization cache --stats-filename= Path to store the progress JSON file to +--publisher= Custom Publisher in ZIM Metadata (openZIM otherwise) This script is used to produce a ZIM file (and any intermediate state) of Gutenberg repository using a mirror.""" @@ -102,6 +103,7 @@ def main(): optimization_cache = arguments.get("--optimization-cache") or None use_any_optimized_version = arguments.get("--use-any-optimized-version", False) stats_filename = arguments.get("--stats-filename") or None + publisher = arguments.get("--publisher") or "openZIM" s3_storage = None if optimization_cache: @@ -224,4 +226,5 @@ def f(x): title=zim_title, description=zim_desc, stats_filename=stats_filename, + publisher=publisher, ) diff --git a/src/gutenberg2zim/shared.py b/src/gutenberg2zim/shared.py index a26f1bb..d152ebb 100644 --- a/src/gutenberg2zim/shared.py +++ b/src/gutenberg2zim/shared.py @@ -32,7 +32,7 @@ def inc_progress(): Global.progress += 1 @staticmethod - def setup(filename, language, title, description, name): + def setup(filename, language, title, description, name, publisher): Global.creator = Creator( filename=filename, main_path="Home.html", @@ -41,7 +41,7 @@ def setup(filename, language, title, description, name): title=title, description=description, creator="gutenberg.org", # type: ignore - publisher="Kiwix", # type: ignore + publisher=publisher, # type: ignore name=name, tags="_category:gutenberg;gutenberg", # type: ignore scraper=f"gutenberg2zim-{VERSION}", # type: ignore diff --git a/src/gutenberg2zim/zim.py b/src/gutenberg2zim/zim.py index 1e163e7..d6817e7 100644 --- a/src/gutenberg2zim/zim.py +++ b/src/gutenberg2zim/zim.py @@ -28,6 +28,7 @@ def build_zimfile( title, description, stats_filename, + publisher, ): # actual list of languages with books sorted by most used nb = fn.COUNT(Book.language).alias("nb") @@ -76,6 +77,7 @@ def build_zimfile( title=title, description=description, name=project_id, + publisher=publisher, ) Global.start() From 847dd0db6666219813d8484c655988480e9473cc Mon Sep 17 00:00:00 2001 From: benoit74 Date: Wed, 17 Jan 2024 11:48:12 +0100 Subject: [PATCH 4/5] Adapt to hatchling v1.19.0 which mandates packages setting --- ChangeLog | 1 + pyproject.toml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/ChangeLog b/ChangeLog index ae9bfaa..f4aa8cf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -17,6 +17,7 @@ as of 2.0.0. ### Fixed - Do not fail if temporary directory already exists (#207) - Typo in `Scraper` ZIM metadata (#212) +- Adapt to hatchling v1.19.0 which mandates packages setting (#211) ## [2.1.0] - 2023-08-18 diff --git a/pyproject.toml b/pyproject.toml index 6cb351a..8d160f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,9 @@ exclude = ["/.github"] path = "hatch_build.py" dependencies = ["zimscraperlib==3.1.1"] +[tool.hatch.build.targets.wheel] +packages = ["src/gutenberg2zim"] + [tool.hatch.envs.default] features = ["dev"] From 727fc44d265c54f6848bb38aaee8c4cbd5ea86e1 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Wed, 17 Jan 2024 11:48:59 +0100 Subject: [PATCH 5/5] Force Python 3.11, zimscraperlib is not yet ready for 3.12 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8d160f4..c873176 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" name = "gutenberg2zim" authors = [{ name = "Kiwix", email = "dev@kiwix.org" }] keywords = ["kiwix", "zim", "offline", "gutenberg"] -requires-python = ">=3.11" +requires-python = ">=3.11,<3.12" description = "Make ZIM file from Gutenberg books" readme = "pypi-readme.rst" license = { text = "GPL-3.0-or-later" }