Skip to content

Commit

Permalink
Merge branch 'release/0.0.6'
Browse files Browse the repository at this point in the history
  • Loading branch information
lb803 committed Oct 12, 2022
2 parents 93d18f5 + a351b6c commit cc782f6
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 25 deletions.
71 changes: 48 additions & 23 deletions src/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,54 @@
from typing import Dict, List


class Crossref():
"""Crossref compatibilty layer"""
def __init__(self, doi: str):
self.works = Works()
class Db():
"""Base Db class to derive specialised database classes from"""
def __init__(self, doi: str) -> None:
self.db = self.init_db()
self.doi = urljoin('https://doi.org/', doi)

def init_db(self):
"""Init database object"""
raise NotImplementedError

def get_book(self):
"""Return book data"""
raise NotImplementedError

def get_chapters(self):
"""Return chapters data"""
raise NotImplementedError


class Crossref(Db):
"""Crossref compatibility layer"""
def init_db(self):
"""Init database object"""
return Works()

def get_book(self) -> Dict:
"""Return the book data associated to the supplied ISBN"""
query = self.works.doi(self.doi)
"""Return book data"""
query = self.db.doi(self.doi)

if not query:
raise ValueError(f"No book data associated to the DOI {self.doi}"
"found on the database Crossref")

data = {"title": query.get("title")[0],
"doi": query.get("DOI")}
return data

def get_chapters(self, book: Dict) -> List:
"""Returns a chapter data related to the book"""
query = self.works.filter(container_title=book.get("title"),
type='book-chapter') \
.select('DOI', 'license', 'author',
'title', 'type', 'page',
'publisher', 'abstract')

# Assert that at least one DOI have been discovered
"""Return chapters data"""
query = self.db.filter(container_title=book.get("title"),
type='book-chapter') \
.select('DOI', 'license', 'author',
'title', 'type', 'page',
'publisher', 'abstract')

if not query:
raise AssertionError('Couldn\'t find any chapter-level DOIs'
+ ' for the supplied --isbn value')
raise ValueError("No chapter data associated to the DOI"
f"{self.doi} found on the database Crossref")

chapters = []
for chapter in query:
Expand All @@ -57,21 +80,23 @@ def join_author_names(self, chapter_data: Dict) -> str:
return '; '.join(author_list)


class Thoth():
"""Thoth compatibilty layer"""
def __init__(self, doi: str):
self.thoth = ThothClient()
self.doi_url = urljoin('https://doi.org/', doi)
class Thoth(Db):
"""Thoth compatibility layer"""
def init_db(self):
"""Init database object"""
return ThothClient()

def get_book(self) -> Dict:
work = self.thoth.work_by_doi(doi=self.doi_url, raw=True)
"""Return book data"""
work = self.db.work_by_doi(doi=self.doi, raw=True)
work_dict = json.loads(work)['data']['workByDoi']

data = {"title": work_dict.get("fullTitle"),
"doi": self.doi_url}
"doi": self.doi}
return data

def get_chapters(self, book: Dict) -> List:
"""Return chapters data"""
# TODO replace this with a Thoth library method when available
url = 'https://api.thoth.pub/graphql'
query = {"query": """{ workByDoi (doi: "%s") {
Expand Down
3 changes: 2 additions & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pdf import Pdf
from metadata import Metadata
from shutil import copy2
import re

app = typer.Typer()

Expand All @@ -28,7 +29,7 @@ def run(input_file: Path = typer.Option("./file.pdf",

# Iterate over chapters metadata
for chapter in metadata.get_chapters():
page_range = chapter.get("pages").split('-')
page_range = re.split('-|–', chapter.get("pages"))
output_file_name = chapter.get("doi").split('/')[-1] + '.pdf'

# Merge PDFs
Expand Down
8 changes: 7 additions & 1 deletion src/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,11 @@ class Metadata:
def __init__(self, database="thoth", doi=None):
if database == "thoth":
self.db = Thoth(doi)
if database == "crossref":
elif database == "crossref":
self.db = Crossref(doi)
else:
raise ValueError(f"Database '{database}' misspelled or not "
"implemented yet.")

self.book = self.fetch_book_data()
self.chapters = self.fetch_chapter_data()
Expand All @@ -69,6 +72,9 @@ def fetch_chapter_data(self) -> List[Chapter]:

def get_chapters(self) -> List[Dict]:
"""Return a list of Chapters (dictionaries)"""
if not self.chapters:
raise ValueError("No chapter data retrieved from the database.")

return [chapter.to_dict() for chapter in self.chapters]

@staticmethod
Expand Down

0 comments on commit cc782f6

Please sign in to comment.