Skip to content

Commit

Permalink
Fix broken <br> handling outside of tags
Browse files Browse the repository at this point in the history
  • Loading branch information
Tim Gromeyer committed Feb 13, 2025
1 parent adda3e5 commit d970806
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/html2md.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ void Converter::TagBreak::OnHasLeftOpeningTag(Converter *c) {
c->appendToMd(Repeat(" ", c->index_li));
} else if (c->is_in_table_) {
c->appendToMd("<br>");
} else if (!c->is_in_p_) {
} else if (!c->is_in_p_ && !c->prev_tag_.empty()) {
c->appendToMd("\n<br>\n\n");
} else if (c->md_.length() > 0)
c->appendToMd(" \n");
Expand Down
37 changes: 23 additions & 14 deletions tests/test_advanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,28 @@ def test_complex_formatting():
assert "1. Numbered one" in result
assert "2. Numbered two" in result

def test_line_breaks():
# Test br outside paragraphs
assert "Text \nText2" in pyhtml2md.convert("Text<br>Text2")

# Test br inside paragraphs
assert "Line 1 \nLine 2" in pyhtml2md.convert("<p>Line 1<br>Line 2</p>")

# Test br with bullet points in paragraph
assert "Primary Colors: \n• Red \n• Blue \n• Yellow" in pyhtml2md.convert("<p>Primary Colors:<br>• Red<br>• Blue<br>• Yellow</p>")

# Test soft line break settings
html = "A very long line of text that should be wrapped according to the soft break and hard break settings"
options = pyhtml2md.Options()
options.splitLines = True
options.softBreak = 20
options.hardBreak = 30

converter = pyhtml2md.Converter(html, options)
result = converter.convert()
lines = result.split('\n')
assert any(len(line) <= 30 for line in lines)

def test_table_formatting():
html = """
<table>
Expand All @@ -68,21 +90,8 @@ def test_table_formatting():
result = converter.convert()

assert "|" in result
# assert "Header 1" in result # BUG: The generated table is wrong, don't uncomment this
assert "Data 1" in result

def test_line_breaks():
html = "A very long line of text that should be wrapped according to the soft break and hard break settings"
options = pyhtml2md.Options()
options.splitLines = True
options.softBreak = 20
options.hardBreak = 30

converter = pyhtml2md.Converter(html, options)
result = converter.convert()
lines = result.split('\n')
assert any(len(line) <= 30 for line in lines)

def test_error_handling():
# Test with malformed HTML
html = "<p>Unclosed paragraph"
Expand Down Expand Up @@ -134,4 +143,4 @@ def test_nested_structures():
assert "**list**" in result

if __name__ == "__main__":
pytest.main([__file__])
pytest.main([__file__])

0 comments on commit d970806

Please sign in to comment.