Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 95 additions & 8 deletions src/Converter/HtmlToDjot.php
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,10 @@ protected function extractRoundTripSource(DOMElement $node, string $tagName): ?s

protected function processLink(DOMElement $node): string
{
if ($this->linkRequiresRawHtmlFallback($node)) {
return $this->processRawHtmlInlineElement($node);
}

if ($node->hasAttribute('data-djot-heading-ref')) {
$target = $node->getAttribute('data-djot-heading-ref');
$displayText = $node->getAttribute('data-djot-heading-ref-display');
Expand Down Expand Up @@ -934,6 +938,8 @@ protected function processLink(DOMElement $node): string
$text = $href;
}

$text = $this->escapeLinkOrImageLabel($text);

// Check for @mention (round-trip support for MentionsExtension)
if ($node->hasAttribute('data-username')) {
$username = $node->getAttribute('data-username');
Expand Down Expand Up @@ -993,9 +999,15 @@ protected function processLink(DOMElement $node): string
protected function processImage(DOMElement $node): string
{
$src = $node->getAttribute('src');
$alt = $node->getAttribute('alt');
$rawAlt = $node->getAttribute('alt');
$title = $node->getAttribute('title');

if ($this->requiresRawImageFallback($rawAlt)) {
return $this->processRawHtmlInlineElement($node);
}

$alt = $this->escapeLinkOrImageLabel($rawAlt);

// Check for reference image (round-trip support)
if ($node->hasAttribute('data-djot-ref')) {
$refLabel = $node->getAttribute('data-djot-ref');
Expand Down Expand Up @@ -1413,7 +1425,7 @@ protected function processTable(DOMElement $node): string
$tag = strtolower($cell->tagName);
if ($tag === 'th' || $tag === 'td') {
// Get cell content with cell attributes
$cellContent = trim($this->processChildren($cell));
$cellContent = $this->serializeTableCellContent($cell);
$cellAttrs = $this->getElementAttributes($cell);
if ($cellAttrs !== '') {
// Cell attributes go after opening pipe: |{.class} content |
Expand Down Expand Up @@ -1524,6 +1536,24 @@ protected function getDirectTableRows(DOMElement $table): array
return $rows;
}

protected function serializeTableCellContent(DOMElement $cell): string
{
$hasBlockChildren = false;

foreach ($cell->childNodes as $child) {
if ($child instanceof DOMElement && in_array(strtolower($child->tagName), $this->blockElements, true)) {
$hasBlockChildren = true;

break;
}
}

$content = $hasBlockChildren ? $this->processBlock($cell) : $this->processChildren($cell);
$content = trim($content);

return preg_replace('/\s+/', ' ', $content) ?? $content;
}

protected function findFirstDirectChildByTagName(DOMElement $node, string $tagName): ?DOMElement
{
$tagName = strtolower($tagName);
Expand Down Expand Up @@ -1697,6 +1727,33 @@ protected function processRawInline(DOMElement $node): string
return $backticks . $content . $backticks . '{=' . $format . '}';
}

protected function processRawHtmlInlineElement(DOMElement $node): string
{
$html = $node->ownerDocument?->saveHTML($node);
if (!is_string($html)) {
$html = '';
}

$backticks = StringUtil::findSafeCodeFence($html, 1);

return $backticks . $html . $backticks . '{=html}';
}

protected function linkRequiresRawHtmlFallback(DOMElement $node): bool
{
foreach ($node->childNodes as $child) {
if (
$child instanceof DOMElement
&& strtolower($child->tagName) === 'img'
&& $this->requiresRawImageFallback($child->getAttribute('alt'))
) {
return true;
}
}

return false;
}

/**
* Process semantic HTML elements to Djot span syntax
*
Expand Down Expand Up @@ -2094,7 +2151,7 @@ protected function processFootnoteContent(DOMElement $li): string
}

// Process the remaining content
$content = trim($this->processChildren($clone));
$content = trim($this->processBlock($clone));

return $content;
}
Expand All @@ -2107,22 +2164,34 @@ protected function formatFootnoteDefinition(string|int $label, string $content):
$formatted = '[^' . $label . ']: ' . $firstLine;

foreach ($lines as $line) {
$formatted .= "\n";
if ($line !== '') {
$formatted .= ' ' . $line;
}
$formatted .= "\n " . $line;
}

return $formatted;
}

protected function escapeLinkOrImageLabel(string $text): string
{
return str_replace(
['\\', '[', ']'],
['\\\\', '\[', '\]'],
$text,
);
}

protected function requiresRawImageFallback(string $alt): bool
{
return strpbrk($alt, '[]\\') !== false;
}

protected function cleanup(string $djot): string
{
// Remove leading whitespace from lines (except in code blocks and indented content)
$lines = explode("\n", $djot);
$inCodeBlock = false;
$inDefinitionList = false;
$inList = false;
$inFootnote = false;
$result = [];

foreach ($lines as $line) {
Expand All @@ -2140,10 +2209,20 @@ protected function cleanup(string $djot): string
continue;
}

if (preg_match('/^\[\^[^\]]+\]:\s*/', $line) === 1) {
$result[] = $line;
$inDefinitionList = false;
$inList = false;
$inFootnote = true;

continue;
}

// Track definition lists (`: term` starts one)
if (str_starts_with($line, ': ')) {
$inDefinitionList = true;
$inList = false;
$inFootnote = false;
$result[] = $line;

continue;
Expand All @@ -2154,6 +2233,7 @@ protected function cleanup(string $djot): string
$result[] = $line;
$inDefinitionList = false;
$inList = true;
$inFootnote = false;

continue;
}
Expand Down Expand Up @@ -2186,9 +2266,15 @@ protected function cleanup(string $djot): string
continue;
}

if ($inFootnote && preg_match('/^\s{2,}\S/', $line)) {
$result[] = $line;

continue;
}

// Blank line (or whitespace-only line) ends definition list context but not list context
if (trim($line) === '') {
$result[] = ''; // Normalize to empty string
$result[] = $inFootnote ? ' ' : ''; // Normalize to empty string unless footnote continuation needs indentation

continue;
}
Expand All @@ -2197,6 +2283,7 @@ protected function cleanup(string $djot): string
$result[] = ltrim($line);
$inDefinitionList = false;
$inList = false;
$inFootnote = false;
}

$djot = implode("\n", $result);
Expand Down
77 changes: 77 additions & 0 deletions tests/TestCase/Converter/HtmlToDjotTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,24 @@ public function testLinkWithQuotedTitleEscapesDjotTitle(): void
$this->assertSame("[Example](https://example.com \"a \\\"quote\\\" here\")\n", $result);
}

public function testLinkEscapesClosingBracketInLabel(): void
{
$result = $this->converter->convert('<a href="https://example.com">a ] b</a>');

$this->assertSame("[a \\] b](https://example.com)\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString('<a href="https://example.com">a ] b</a>', $htmlBack);
}

public function testLinkEscapesBackslashInLabel(): void
{
$result = $this->converter->convert('<a href="https://example.com">a \\ b</a>');

$this->assertSame("[a \\\\ b](https://example.com)\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString('<a href="https://example.com">a \ b</a>', $htmlBack);
}

// ==================== Images ====================

public function testImage(): void
Expand All @@ -150,6 +168,33 @@ public function testImageWithQuotedTitleEscapesDjotTitle(): void
$this->assertSame("![Alt](image.jpg \"a \\\"quote\\\" here\")\n", $result);
}

public function testImageWithBracketInAltFallsBackToRawHtml(): void
{
$result = $this->converter->convert('<img src="img.png" alt="a [ b">');

$this->assertSame("`<img src=\"img.png\" alt=\"a [ b\">`{=html}\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString('<img src="img.png" alt="a [ b">', $htmlBack);
}

public function testImageWithBackslashInAltFallsBackToRawHtml(): void
{
$result = $this->converter->convert('<img src="img.png" alt="a \\ b">');

$this->assertSame("`<img src=\"img.png\" alt=\"a \ b\">`{=html}\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString('<img src="img.png" alt="a \\ b">', $htmlBack);
}

public function testLinkWrappingProblematicImageFallsBackToRawHtml(): void
{
$result = $this->converter->convert('<a href="https://example.com"><img src="img.png" alt="a [ b"></a>');

$this->assertSame("`<a href=\"https://example.com\"><img src=\"img.png\" alt=\"a [ b\"></a>`{=html}\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString('<a href="https://example.com"><img src="img.png" alt="a [ b"></a>', $htmlBack);
}

// ==================== Code ====================

public function testInlineCode(): void
Expand Down Expand Up @@ -450,6 +495,18 @@ public function testEndnotesSectionDoesNotTreatNestedListItemsAsFootnotes(): voi
$this->assertStringNotContainsString("\n1. nested", $result);
}

public function testEndnotesSectionKeepsMultilineFootnoteInsideDefinition(): void
{
$html = '<section role="doc-endnotes"><ol><li id="fn1" data-djot-footnote-label="1"><p>One</p><p>Two</p><p><a role="doc-backlink" href="#fnref1">↩︎</a></p></li></ol></section>';
$result = $this->converter->convert($html);

$this->assertSame("[^1]: One\n \n Two\n", $result);

$htmlBack = (new DjotConverter())->convert("ref[^1]\n\n" . $result);
$this->assertStringContainsString('<p>One</p>', $htmlBack);
$this->assertStringContainsString('<p>Two<a href="#fnref1"', $htmlBack);
}

public function testTableWithCaption(): void
{
$html = <<<'HTML'
Expand All @@ -466,6 +523,26 @@ public function testTableWithCaption(): void
$this->assertStringContainsString('^ Monthly Sales Data', $result);
}

public function testTableCellWithMultipleParagraphsFallsBackToSingleLineCellText(): void
{
$html = '<table><tr><td><p>One</p><p>Two</p></td></tr></table>';
$result = $this->converter->convert($html);

$this->assertSame("| One Two |\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString('<td>One Two</td>', $htmlBack);
}

public function testTableCellWithNestedListFallsBackToSingleLineCellText(): void
{
$html = '<table><tr><td><ul><li>Item</li></ul></td></tr></table>';
$result = $this->converter->convert($html);

$this->assertSame("| - Item |\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString('<td>- Item</td>', $htmlBack);
}

public function testTableWithMultilineCaptionKeepsAllCaptionTextInsideCaption(): void
{
$html = '<table><caption><p>cap one</p><p>cap two</p></caption><tr><td>x</td></tr></table>';
Expand Down
Loading