From 3c702c5fd97381ca9b948bd7ca4316209517fc31 Mon Sep 17 00:00:00 2001 From: Luca Baffa Date: Fri, 15 Jul 2022 16:22:23 +0100 Subject: [PATCH 1/6] raise a ValueError if no data is fetched from database --- src/metadata.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/metadata.py b/src/metadata.py index 4875f25..37da904 100644 --- a/src/metadata.py +++ b/src/metadata.py @@ -69,6 +69,9 @@ def fetch_chapter_data(self) -> List[Chapter]: def get_chapters(self) -> List[Dict]: """Return a list of Chapters (dictionaries)""" + if not self.chapters: + raise ValueError("No chapter data retrieved from the database.") + return [chapter.to_dict() for chapter in self.chapters] @staticmethod From 2688fbb6fc7f321939851cede18f134a8d5afc9a Mon Sep 17 00:00:00 2001 From: Luca Baffa Date: Tue, 19 Jul 2022 09:24:39 +0100 Subject: [PATCH 2/6] raise exception if no book data fetched from crossref --- src/db.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/db.py b/src/db.py index da1a7dc..9bb52c6 100644 --- a/src/db.py +++ b/src/db.py @@ -15,6 +15,11 @@ def __init__(self, doi: str): def get_book(self) -> Dict: """Return the book data associated to the supplied ISBN""" query = self.works.doi(self.doi) + + if not query: + raise ValueError(f"No book data associated to the DOI {self.doi}" + "found on the database Crossref") + data = {"title": query.get("title")[0], "doi": query.get("DOI")} return data From a0be6827cc97f966b0ce829b59e747732c6fb834 Mon Sep 17 00:00:00 2001 From: Luca Baffa Date: Tue, 19 Jul 2022 09:27:00 +0100 Subject: [PATCH 3/6] improve exception message if no chapter data fetched from crossref --- src/db.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/db.py b/src/db.py index 9bb52c6..7f1e9f5 100644 --- a/src/db.py +++ b/src/db.py @@ -32,10 +32,9 @@ def get_chapters(self, book: Dict) -> List: 'title', 'type', 'page', 'publisher', 'abstract') - # Assert that at least one DOI have been discovered if not query: - raise AssertionError('Couldn\'t find any chapter-level DOIs' - + ' for the supplied --isbn value') + raise ValueError("No chapter data associated to the DOI" + f"{self.doi} found on the database Crossref") chapters = [] for chapter in query: From 4d8d129d4c1db97df5d9452b6995387f1db1885c Mon Sep 17 00:00:00 2001 From: Luca Baffa Date: Tue, 2 Aug 2022 14:47:43 +0100 Subject: [PATCH 4/6] split page ranges at either hyphen or en dash --- src/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index d564768..2545ce8 100644 --- a/src/main.py +++ b/src/main.py @@ -7,6 +7,7 @@ from pdf import Pdf from metadata import Metadata from shutil import copy2 +import re app = typer.Typer() @@ -28,7 +29,7 @@ def run(input_file: Path = typer.Option("./file.pdf", # Iterate over chapters metadata for chapter in metadata.get_chapters(): - page_range = chapter.get("pages").split('-') + page_range = re.split('-|–', chapter.get("pages")) output_file_name = chapter.get("doi").split('/')[-1] + '.pdf' # Merge PDFs From f72f01b3c47e1fced69090e4d9e80267af41efdb Mon Sep 17 00:00:00 2001 From: Luca Baffa Date: Wed, 17 Aug 2022 17:29:00 +0100 Subject: [PATCH 5/6] improve module design --- src/db.py | 59 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/src/db.py b/src/db.py index 7f1e9f5..c95daef 100644 --- a/src/db.py +++ b/src/db.py @@ -6,15 +6,34 @@ from typing import Dict, List -class Crossref(): - """Crossref compatibilty layer""" - def __init__(self, doi: str): - self.works = Works() +class Db(): + """Base Db class to derive specialised database classes from""" + def __init__(self, doi: str) -> None: + self.db = self.init_db() self.doi = urljoin('https://doi.org/', doi) + def init_db(self): + """Init database object""" + raise NotImplementedError + + def get_book(self): + """Return book data""" + raise NotImplementedError + + def get_chapters(self): + """Return chapters data""" + raise NotImplementedError + + +class Crossref(Db): + """Crossref compatibility layer""" + def init_db(self): + """Init database object""" + return Works() + def get_book(self) -> Dict: - """Return the book data associated to the supplied ISBN""" - query = self.works.doi(self.doi) + """Return book data""" + query = self.db.doi(self.doi) if not query: raise ValueError(f"No book data associated to the DOI {self.doi}" @@ -25,12 +44,12 @@ def get_book(self) -> Dict: return data def get_chapters(self, book: Dict) -> List: - """Returns a chapter data related to the book""" - query = self.works.filter(container_title=book.get("title"), - type='book-chapter') \ - .select('DOI', 'license', 'author', - 'title', 'type', 'page', - 'publisher', 'abstract') + """Return chapters data""" + query = self.db.filter(container_title=book.get("title"), + type='book-chapter') \ + .select('DOI', 'license', 'author', + 'title', 'type', 'page', + 'publisher', 'abstract') if not query: raise ValueError("No chapter data associated to the DOI" @@ -61,21 +80,23 @@ def join_author_names(self, chapter_data: Dict) -> str: return '; '.join(author_list) -class Thoth(): - """Thoth compatibilty layer""" - def __init__(self, doi: str): - self.thoth = ThothClient() - self.doi_url = urljoin('https://doi.org/', doi) +class Thoth(Db): + """Thoth compatibility layer""" + def init_db(self): + """Init database object""" + return ThothClient() def get_book(self) -> Dict: - work = self.thoth.work_by_doi(doi=self.doi_url, raw=True) + """Return book data""" + work = self.db.work_by_doi(doi=self.doi, raw=True) work_dict = json.loads(work)['data']['workByDoi'] data = {"title": work_dict.get("fullTitle"), - "doi": self.doi_url} + "doi": self.doi} return data def get_chapters(self, book: Dict) -> List: + """Return chapters data""" # TODO replace this with a Thoth library method when available url = 'https://api.thoth.pub/graphql' query = {"query": """{ workByDoi (doi: "%s") { From a351b6cfad78b2749cf72e41784bfaa4cf5005dd Mon Sep 17 00:00:00 2001 From: Luca Baffa Date: Wed, 17 Aug 2022 17:45:42 +0100 Subject: [PATCH 6/6] raise a ValueError if database name not recognized (mispelled or not implemented yet) --- src/metadata.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/metadata.py b/src/metadata.py index 37da904..1573425 100644 --- a/src/metadata.py +++ b/src/metadata.py @@ -52,8 +52,11 @@ class Metadata: def __init__(self, database="thoth", doi=None): if database == "thoth": self.db = Thoth(doi) - if database == "crossref": + elif database == "crossref": self.db = Crossref(doi) + else: + raise ValueError(f"Database '{database}' misspelled or not " + "implemented yet.") self.book = self.fetch_book_data() self.chapters = self.fetch_chapter_data()