Skip to content

Commit

Permalink
Merge pull request #104 from openzim/table_of_content
Browse files Browse the repository at this point in the history
Rewrite libretexts.org Table of Content pages
  • Loading branch information
benoit74 authored Dec 6, 2024
2 parents d961cc2 + 139fa5d commit b1e3ebe
Show file tree
Hide file tree
Showing 6 changed files with 214 additions and 22 deletions.
37 changes: 37 additions & 0 deletions scraper/src/mindtouch2zim/libretexts/table_of_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from jinja2 import Template
from zimscraperlib.rewriting.html import HtmlRewriter

from mindtouch2zim.client import LibraryPage, MindtouchClient
from mindtouch2zim.libretexts.errors import BadBookPageError

"""
Logic here and in the Jinja2 template comes from
https://cdn.libretexts.net/github/LibreTextsMain/DynamicTOC/dist/dynamicTOC.min.js
Probably coming from
https://github.com/LibreTexts/Libretext/blob/master/public/DynamicTOC/dynamicTOC.js and
https://github.com/LibreTexts/Libretext/blob/master/public/DynamicTOC/dynamicTOC.css
"""


def _render_html_from_data(jinja2_template: Template, cover_page: LibraryPage) -> str:
return jinja2_template.render(cover_page=cover_page)


def rewrite_table_of_content(
rewriter: HtmlRewriter,
jinja2_template: Template,
mindtouch_client: MindtouchClient,
page: LibraryPage,
) -> str:
"""
Get and statically rewrite the table of content of libretexts.org
"""

cover_page = mindtouch_client.get_cover_page(page)
if cover_page is None:
raise BadBookPageError()
return rewriter.rewrite(
_render_html_from_data(jinja2_template=jinja2_template, cover_page=cover_page)
).content
18 changes: 18 additions & 0 deletions scraper/src/mindtouch2zim/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from mindtouch2zim.libretexts.detailed_licensing import rewrite_detailed_licensing
from mindtouch2zim.libretexts.glossary import rewrite_glossary
from mindtouch2zim.libretexts.index import rewrite_index
from mindtouch2zim.libretexts.table_of_content import rewrite_table_of_content
from mindtouch2zim.ui import (
ConfigModel,
PageContentModel,
Expand Down Expand Up @@ -255,6 +256,9 @@ def _run_internal(self) -> Path:
self.libretexts_detailed_licensing_template = self.jinja2_env.get_template(
"libretexts.detailed-licensing.html"
)
self.libretexts_table_of_content_template = self.jinja2_env.get_template(
"libretexts.table-of-content.html"
)

# Start creator early to detect problems early.
with creator as creator:
Expand Down Expand Up @@ -565,6 +569,20 @@ def _process_page(
mindtouch_client=self.mindtouch_client,
page=page,
)
elif (
"https://cdn.libretexts.net/github/LibreTextsMain/DynamicTOC/dist/dynamicTOC.min.js"
in page_content.html_body
):
logger.debug(
f"Rewriting {context.current_thread_workitem} as libretexts.org"
" table of content"
)
rewriten = rewrite_table_of_content(
rewriter=rewriter,
jinja2_template=self.libretexts_table_of_content_template,
mindtouch_client=self.mindtouch_client,
page=page,
)
except Exception as exc:
# code has been tested to work "in-general", but many edge-case occurs
# and since these pages are absolutely not essential, we just display a
Expand Down
111 changes: 111 additions & 0 deletions scraper/src/mindtouch2zim/templates/libretexts.table-of-content.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
<style>
#dynamicTOC {
--toc-font-family: 'Lato', Arial, Helvetica, sans-serif;
}

#dynamicTOC_title_container {
display: none;
}

.toc li {
margin: 0.25em 0 0;
}

.chapter_level {
list-style: none;
}

.section_level {
list-style-type: circle;
}

.chapter_entry {
font-size: 1.5rem !important;
font-family: var(--toc-font-family) !important;
}

.content_entry {
font-size: 1.1rem !important;
font-family: var(--toc-font-family) !important;
}

.matter_entry {
font-size: 1.25rem !important;
font-family: var(--toc-font-family) !important;
}

@media print {
#title {
display: none;
}

#dynamicTOC_title_container {
display: flex;
background: #127bc4;
margin: 0 0 2%;
padding: 0;
width: 100%;
align-items: center;
}

#dynamicTOC_title_heading {
color: white !important;
font-size: 1.6em !important;
font-family: 'Lato', Arial, serif !important;
text-transform: uppercase;
font-weight: bold;
margin: 0 0 0 1% !important;
padding: 1% 0 !important;
letter-spacing: 0.05em !important;
}

.content_entry,
.chapter_entry,
.matter_entry {
font-family: var(--toc-font-family) !important;
}

.chapter_entry {
font-size: 1.25rem !important;
}

.content_entry {
font-size: 0.75rem !important;
}

.matter_entry {
font-size: 1rem !important;
margin-bottom: 0.5em !important;
}
}
</style>
<div id="dynamicTOC" aria-live="polite" aria-busy="false">
<ul class="toc chapter_level">
{% for chapter in cover_page.children %} {% if chapter.title in ["Front Matter", "Back Matter"]
%} {% for content in chapter.children %} {% if content.title not in ["TitlePage", "InfoPage",
"Table of Contents"] %}
<li>
<h2 class="matter_entry">
<a href="{{ content.encoded_url }}">{{ content.title }}</a>
</h2>
</li>
{% endif %} {% endfor %} {% else %}
<li>
<h2 class="{{ 'chapter_entry' if chapter.children else 'matter_entry' }}">
<a href="{{ chapter.encoded_url }}">{{ chapter.title }}</a>
</h2>
{% if chapter.children %}
<ul class="section_level">
{% for content in chapter.children %}
<li>
<span class="content_entry">
<a href="{{ content.encoded_url }}">{{ content.title }}</a>
</span>
</li>
{% endfor %}
</ul>
{% endif %}
</li>
{% endif %} {% endfor %}
</ul>
</div>
26 changes: 26 additions & 0 deletions scraper/tests-integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
# initialize a context since it is a requirement for most modules to load
Context.setup(**CONTEXT_DEFAULTS)

context = Context.get()

# import client late so that context is already initialized
from mindtouch2zim.client import MindtouchClient # noqa: E402


@pytest.fixture(scope="module")
def libretexts_slug() -> str:
Expand Down Expand Up @@ -66,3 +71,24 @@ def home_icons_urls() -> list[str]:
"https://a.mtstatic.com/@public/production/site_4038/1486479235-apple-touch-icon.png",
"https://a.mtstatic.com/@public/production/site_4038/1486479325-favicon.ico",
]


@pytest.fixture(scope="module")
def raw_client(libretexts_url: str, cache_folder: Path) -> MindtouchClient:
context.library_url = libretexts_url
context.cache_folder = cache_folder
return MindtouchClient()


@pytest.fixture(scope="module")
def client(
raw_client: MindtouchClient,
deki_token: str, # noqa: ARG001
) -> MindtouchClient:
"""already authenticated client (avoid having to fetch deki_token in tests)"""
return raw_client


@pytest.fixture(scope="module")
def deki_token(raw_client: MindtouchClient) -> str:
return raw_client.get_deki_token()
22 changes: 22 additions & 0 deletions scraper/tests-integration/libretexts/test_table_of_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pytest
from jinja2 import Environment, FileSystemLoader, select_autoescape

from mindtouch2zim.client import LibraryPage, MindtouchClient
from mindtouch2zim.constants import ROOT_DIR
from mindtouch2zim.libretexts.table_of_content import _render_html_from_data


@pytest.fixture(scope="module")
def page_data(client: MindtouchClient) -> LibraryPage:
cover_page = client.get_cover_page_id("15839")
assert cover_page
return client.get_page_tree(cover_page).root


def test_render_table_of_content_template(page_data: LibraryPage):
jinja2_env = Environment(
loader=FileSystemLoader(ROOT_DIR.joinpath("templates")),
autoescape=select_autoescape(),
)
template = jinja2_env.get_template("libretexts.table-of-content.html")
assert _render_html_from_data(template, page_data)
22 changes: 0 additions & 22 deletions scraper/tests-integration/test_client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import io
import re
from pathlib import Path

import pytest
from zimscraperlib.image.probing import format_for
Expand All @@ -17,32 +16,11 @@
context = Context.get()


@pytest.fixture(scope="module")
def raw_client(libretexts_url: str, cache_folder: Path) -> MindtouchClient:
context.library_url = libretexts_url
context.cache_folder = cache_folder
return MindtouchClient()


@pytest.fixture(scope="module")
def client(
raw_client: MindtouchClient,
deki_token: str, # noqa: ARG001
) -> MindtouchClient:
"""already authenticated client (avoid having to fetch deki_token in tests)"""
return raw_client


@pytest.fixture(scope="module")
def home(client: MindtouchClient) -> MindtouchHome:
return client.get_home()


@pytest.fixture(scope="module")
def deki_token(raw_client: MindtouchClient) -> str:
return raw_client.get_deki_token()


@pytest.fixture(scope="module")
def minimum_number_of_pages() -> int:
return 8000
Expand Down

0 comments on commit b1e3ebe

Please sign in to comment.