Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 61 additions & 5 deletions services/gitdiff/gitdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"html/template"
"io"
"net/url"
"regexp"
"sort"
"strings"
"time"
Expand All @@ -23,6 +24,7 @@ import (
pull_model "code.gitea.io/gitea/models/pull"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
Expand Down Expand Up @@ -796,6 +798,41 @@ func skipToNextDiffHead(input *bufio.Reader) (line string, err error) {
return line, err
}

// base64ImageLineRe matches a Markdown image with an inline base64 data URI, capturing
// the alt text, e.g. ![alt](data:image/png;base64,...). Anchored to the image syntax so
// prose/code that merely mentions a data URI is not misclassified.
var base64ImageLineRe = regexp.MustCompile(`!\[([^\]]*)\]\(\s*data:image/[a-zA-Z0-9.+-]+;base64,`)

// isBase64ImageDiffLine reports whether a diff line payload (the content after the +/-/space
// prefix) is such a base64 image line. These lines are produced by Forkana's article editor
// and can be very long.
func isBase64ImageDiffLine(s string) bool {
return base64ImageLineRe.MatchString(s)
}

// base64ImagePlaceholder builds a short, human-readable replacement for an over-long base64
// image diff line, preserving the +/-/space prefix and the alt text so the rest of the file
// diff stays visible. firstFragment is the line content read so far (it always contains the
// "![alt](data:image/...;base64," head); totalLen is the full byte length of the original line.
func base64ImagePlaceholder(firstFragment string, totalLen int) string {
prefix := firstFragment[0]
alt := ""
if m := base64ImageLineRe.FindStringSubmatch(firstFragment); m != nil {
alt = m[1]
}
decoded := 0
if off := strings.Index(firstFragment, ";base64,"); off >= 0 {
// payload runs from after ";base64," to the end of the line; ignore the trailing ")".
if payloadLen := totalLen - off - len(";base64,") - 1; payloadLen > 0 {
decoded = payloadLen * 3 / 4
}
}
if alt != "" {
return string(prefix) + fmt.Sprintf("[embedded base64 image: %s, ~%s]", alt, base.FileSize(int64(decoded)))
}
return string(prefix) + fmt.Sprintf("[embedded base64 image, ~%s]", base.FileSize(int64(decoded)))
}

func parseHunks(ctx context.Context, curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio.Reader) (lineBytes []byte, isFragment bool, err error) {
sb := strings.Builder{}

Expand Down Expand Up @@ -964,20 +1001,39 @@ func parseHunks(ctx context.Context, curFile *DiffFile, maxLines, maxLineCharact

line := string(lineBytes)
if isFragment {
curFile.IsIncomplete = true
curFile.IsIncompleteLineTooLong = true
// The line is longer than the read buffer (readerSize == max(maxLineCharacters, 4096)),
// so this is where over-long lines actually land. If it's a base64 image line stored by
// Forkana's article editor, substitute a short placeholder so the rest of the file diff
// stays visible instead of being suppressed; otherwise mark it too-long as before.
isB64 := len(line) > 1 && isBase64ImageDiffLine(line[1:])
if !isB64 {
curFile.IsIncomplete = true
curFile.IsIncompleteLineTooLong = true
}
totalLen := len(line)
for isFragment {
lineBytes, isFragment, err = input.ReadLine()
if err != nil {
// Now by the definition of ReadLine this cannot be io.EOF
return lineBytes, isFragment, fmt.Errorf("unable to ReadLine: %w", err)
}
totalLen += len(lineBytes)
}
if isB64 {
line = base64ImagePlaceholder(line, totalLen)
}
}
if len(line) > maxLineCharacters {
curFile.IsIncomplete = true
curFile.IsIncompleteLineTooLong = true
line = line[:maxLineCharacters]
// The line fit in the read buffer but still exceeds the display limit (only possible
// when MaxGitDiffLineCharacters is configured below the buffer size). Same handling:
// replace a base64 image line with a placeholder, otherwise truncate and flag it.
if len(line) > 1 && isBase64ImageDiffLine(line[1:]) {
line = base64ImagePlaceholder(line, len(line))
} else {
curFile.IsIncomplete = true
curFile.IsIncompleteLineTooLong = true
line = line[:maxLineCharacters]
}
}
curSection.Lines[len(curSection.Lines)-1].Content = line

Expand Down
58 changes: 58 additions & 0 deletions services/gitdiff/gitdiff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,64 @@ index 0000000..6bb8f39
}
}

func TestParsePatch_base64Image(t *testing.T) {
// A Markdown image with a long inline base64 data URI (as stored by Forkana's article
// editor) must NOT suppress the whole file diff; the payload is replaced by a short
// placeholder while the rest of the file renders normally. See issue #233 / PR #237.
// The payload is far larger than the read buffer (max(maxLineCharacters, 4096)), so the
// line is returned as a fragment — the path that previously bypassed the placeholder.
base64Payload := strings.Repeat("A", 8000)
diff := "diff --git a/article.md b/article.md\n" +
"new file mode 100644\n" +
"index 0000000..1111111\n" +
"--- /dev/null\n" +
"+++ b/article.md\n" +
"@@ -0,0 +1,3 @@\n" +
"+# Title\n" +
"+![logo](data:image/png;base64," + base64Payload + ")\n" +
"+Some text after\n"

result, err := ParsePatch(t.Context(), 1000, 5000, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "")
require.NoError(t, err)
require.Len(t, result.Files, 1)
f := result.Files[0]
assert.False(t, f.IsIncomplete, "file diff must not be suppressed for a base64 image line")
assert.False(t, f.IsIncompleteLineTooLong, "base64 image line must not flag the file as too-long")

var joined string
for _, sec := range f.Sections {
for _, ln := range sec.Lines {
joined += ln.Content + "\n"
}
}
assert.Contains(t, joined, "+# Title", "content before the image should remain visible")
assert.Contains(t, joined, "+Some text after", "content after the image should remain visible")
assert.Contains(t, joined, "embedded base64 image: logo", "the image line should become a placeholder keeping the alt text")
assert.NotContains(t, joined, base64Payload, "the raw base64 payload must not be kept in the diff")

// A long line that merely *mentions* a data URI (not the ![](…) image syntax) must still
// be treated as a too-long line, not rewritten to a fake image placeholder.
prose := "+" + strings.Repeat("x", 6000) + " data:image/png;base64,xx"
diff2 := "diff --git a/notes.md b/notes.md\n" +
"new file mode 100644\n" +
"index 0000000..2222222\n" +
"--- /dev/null\n" +
"+++ b/notes.md\n" +
"@@ -0,0 +1,1 @@\n" +
prose + "\n"
result2, err := ParsePatch(t.Context(), 1000, 5000, setting.Git.MaxGitDiffFiles, strings.NewReader(diff2), "")
require.NoError(t, err)
require.Len(t, result2.Files, 1)
assert.True(t, result2.Files[0].IsIncompleteLineTooLong, "non-image long line should still be flagged too-long")
var joined2 string
for _, sec := range result2.Files[0].Sections {
for _, ln := range sec.Lines {
joined2 += ln.Content
}
}
assert.NotContains(t, joined2, "embedded base64 image", "prose mentioning a data URI must not be rewritten as an image placeholder")
}

func setupDefaultDiff() *Diff {
return &Diff{
Files: []*DiffFile{
Expand Down
56 changes: 56 additions & 0 deletions web_src/js/features/toast-editor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,66 @@ export async function createToastEditor(
}
}, true);

// Some engines (notably Safari) ignore `clipboardData` passed to the ClipboardEvent
// constructor, which would make the re-dispatch below carry no data and drop the paste.
// Detect support once; if unsupported, skip the strip and let the paste proceed normally
// (the gitdiff base64 placeholder is the backend safety net).
let canConstructClipboardData = false;
try {
canConstructClipboardData = new ClipboardEvent('paste', {clipboardData: new DataTransfer()}).clipboardData !== null;
} catch {
canConstructClipboardData = false;
}

container.addEventListener('paste', (e: ClipboardEvent) => {
if (!ensureFilesWithinLimit(e.clipboardData?.files)) {
e.preventDefault();
e.stopPropagation();
return;
}
if (!canConstructClipboardData) return;

// When the clipboard contains HTML (e.g. content copied from YouTube or other media
// sites), it may include external thumbnail <img> elements. If left in, Toast UI
// converts them to base64 blobs via addImageBlobHook, creating very long lines in the
// saved markdown file and triggering the git diff suppression (issue #233).
// Rewrite the clipboard as HTML with external images removed, keeping only data: URIs
// (locally pasted/dropped images that the user deliberately embedded).
const html = e.clipboardData?.getData('text/html');
if (html && e.clipboardData?.types.includes('text/html')) {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
let strippedAny = false;
for (const img of doc.querySelectorAll('img')) {
const src = img.getAttribute('src') || '';
if (!src.startsWith('data:')) {
// Replace external image with its alt text as a plain text node, or remove entirely
const alt = img.getAttribute('alt');
if (alt) {
img.replaceWith(doc.createTextNode(alt));
} else {
img.remove();
}
strippedAny = true;
}
}
if (strippedAny) {
e.preventDefault();
e.stopPropagation();
const cleanHtml = doc.body.innerHTML;
const text = e.clipboardData.getData('text/plain');
const dt = new DataTransfer();
dt.setData('text/html', cleanHtml);
dt.setData('text/plain', text);
// Re-dispatch on the element the editor actually listens on (the pseudo-clipboard
// textarea in markdown mode, or the ProseMirror contenteditable in WYSIWYG) — NOT
// `container`, which is an ancestor the editor's paste handler never receives. The
// synthetic event re-enters this capture listener, but with no external <img> left
// it falls through and reaches the editor. Rebuilding the DataTransfer without the
// image item is what keeps the pasted text while dropping the incidental thumbnail.
const target = (e.target as HTMLElement) ?? container;
target.dispatchEvent(new ClipboardEvent('paste', {bubbles: true, cancelable: true, clipboardData: dt}));
}
}
}, true);

Expand Down
Loading