Skip to content

Commit

Permalink
Don't parse links in code blocks or comments
Browse files Browse the repository at this point in the history
  • Loading branch information
MartenBE committed Nov 8, 2024
1 parent 55b9e1f commit 715f0ab
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 56 deletions.
85 changes: 84 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ jsonschema = "^4.23.0"
natsort = "^8.4.0"
emoji = "^2.12.1"
livereload = "^2.7.0"
markdown = "^3.7"
beautifulsoup4 = "^4.12.3"
types-markdown = "^3.7.0.20240822"
types-beautifulsoup4 = "^4.12.0.20241020"

[tool.poetry.group.dev.dependencies]
bumpver = "^2023.1129"
Expand Down
34 changes: 0 additions & 34 deletions src/mkslides/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,12 @@

from jinja2 import Environment, FileSystemLoader, PackageLoader, select_autoescape

MD_LINK_REGEX = re.compile(
r"""
\[.*?\] # Alt text
\((?P<location>[^()<>]*?)\) # Location without angle brackets
""",
re.VERBOSE,
)

MD_ESCAPED_LINK_REGEX = re.compile(
r"""
\[.*?\] # Alt text
\(<(?P<location>[^<>]*?)>\) # Location with mandatory angle brackets
""",
re.VERBOSE,
)

HTML_IMAGE_REGEX = re.compile(
r"""
<img # Start of the image
.+? # Any attributes
src= # src attribute
(?P<delimiter>['\"]) # Delimiter
(?P<location>.+?) # Image location
(?P=delimiter) # Delimiter
.*? # Any attributes
> # End of the image
""",
re.VERBOSE,
)

HTML_BACKGROUND_IMAGE_REGEX = re.compile(
r"""
<!-- # Start of the comment
.*? # Any content
data-background-image= # data-background-image attribute
(?P<delimiter>['\"]) # Delimiter
(?P<location>.+?) # Image location
(?P=delimiter) # Delimiter
.*? # Any content
--> # End of the comment
""",
re.VERBOSE,
)
Expand Down
52 changes: 32 additions & 20 deletions src/mkslides/markupgenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from urllib.parse import urlparse

import frontmatter # type: ignore[import-untyped]
import markdown
from bs4 import BeautifulSoup, Comment
from emoji import emojize
from natsort import natsorted

Expand All @@ -18,10 +20,7 @@
DEFAULT_SLIDESHOW_TEMPLATE,
HIGHLIGHTJS_THEMES_RESOURCE,
HTML_BACKGROUND_IMAGE_REGEX,
HTML_IMAGE_REGEX,
LOCAL_JINJA2_ENVIRONMENT,
MD_ESCAPED_LINK_REGEX,
MD_LINK_REGEX,
REVEALJS_RESOURCE,
REVEALJS_THEMES_RESOURCE,
)
Expand Down Expand Up @@ -107,8 +106,8 @@ def __process_markdown_file(
# Retrieve the frontmatter metadata and the markdown content

content = md_file.read_text(encoding="utf-8-sig")
metadata, markdown = frontmatter.parse(content)
markdown = emojize(markdown, language="alias")
metadata, markdown_content = frontmatter.parse(content)
markdown_content = emojize(markdown_content, language="alias")

# Get the relative path of reveal.js

Expand Down Expand Up @@ -181,15 +180,15 @@ def __process_markdown_file(
highlight_theme=relative_highlight_theme_path,
revealjs_path=relative_revealjs_path,
markdown_data_options=markdown_data_options,
markdown=markdown,
markdown=markdown_content,
revealjs_config=revealjs_config,
plugins=plugins,
)
self.__create_file(output_markup_path, markup)

# Copy local files

self.__copy_local_files(md_file, md_root_path, markdown)
self.__copy_local_files(md_file, md_root_path, markdown_content)

return metadata, output_markup_path

Expand Down Expand Up @@ -250,20 +249,13 @@ def __copy_local_files(
self,
md_file: Path,
md_root_path: Path,
markdown: str,
markdown_content: str,
) -> None:
for regex in [
MD_LINK_REGEX,
MD_ESCAPED_LINK_REGEX,
HTML_IMAGE_REGEX,
HTML_BACKGROUND_IMAGE_REGEX,
]:
for m in regex.finditer(markdown):
location = m.group("location")

if self.__get_url_type(location) == URLType.RELATIVE:
image = Path(md_file.parent, location).resolve(strict=True)
self.__copy_to_output_relative_to_md_root(image, md_root_path)
links = self.__find_all_links(markdown_content)
for link in links:
if self.__get_url_type(link) == URLType.RELATIVE:
image = Path(md_file.parent, link).resolve(strict=True)
self.__copy_to_output_relative_to_md_root(image, md_root_path)

def __copy_theme(
self,
Expand Down Expand Up @@ -390,3 +382,23 @@ def __get_url_type(self, url: str) -> URLType:
return URLType.ABSOLUTE

return URLType.RELATIVE

def __find_all_links(self, markdown_content: str) -> set[str]:
html_content = markdown.markdown(markdown_content)
soup = BeautifulSoup(html_content, "html.parser")

found_links = set()

for link in soup.find_all("a", href=True):
if not link.find_parents(["code", "pre"]):
found_links.add(link["href"])

for link in soup.find_all("img", src=True):
if not link.find_parents(["code", "pre"]):
found_links.add(link["src"])

for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
if match := HTML_BACKGROUND_IMAGE_REGEX.search(comment):
found_links.add(match.group("location"))

return found_links
35 changes: 34 additions & 1 deletion tests/test_files/someslides.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,39 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor i

## Don't try to parse anchor links

[Go to anchor](#anchor)
This should not throw an error:

[Go to anchor](#some-random-anchor)

---

## Don't try to parse links in code blocks

This should not throw an error:

`[test](./some-random-md-link)`
`![test](./some-random-md-image)`
`<a href="./some-random-html-link">test</a>`
`<img src="./some-random-html-image" />`

```markdown
[test](./some-random-md-link)
![test](./some-random-md-image)
```

```html
<a href="./some-random-html-link">test</a>
<img src="./some-random-html-image" />
```

[test](./some-random-md-link)
![test](./some-random-md-image)
<a href="./some-random-html-link">test</a>
<img src="./some-random-html-image" />

<!--
[test](./some-random-md-link)
![test](./some-random-md-image)
<a href="./some-random-html-link">test</a>
<img src="./some-random-html-image" />
-->

0 comments on commit 715f0ab

Please sign in to comment.