diff --git a/embedmd/content.go b/embedmd/content.go index be23501..dc0c832 100644 --- a/embedmd/content.go +++ b/embedmd/content.go @@ -36,8 +36,7 @@ type fetcher struct{} func (fetcher) Fetch(dir, path string) ([]byte, error) { if !strings.HasPrefix(path, "http://") && !strings.HasPrefix(path, "https://") { - path = filepath.Join(dir, filepath.FromSlash(path)) - return os.ReadFile(path) + return os.ReadFile(filepath.Join(dir, filepath.FromSlash(path))) } res, err := http.Get(path) diff --git a/embedmd/embedmd.go b/embedmd/embedmd.go index aed9eea..52f8c84 100644 --- a/embedmd/embedmd.go +++ b/embedmd/embedmd.go @@ -54,7 +54,10 @@ package embedmd import ( "fmt" "io" + "os" + "path/filepath" "regexp" + "strings" ) // Process reads markdown from the given io.Reader searching for an embedmd @@ -88,7 +91,38 @@ type embedder struct { baseDir string } +// checkPath returns an error if path is a local path that escapes dir. +// URLs (http/https) are not checked. +// +// When dir is empty (e.g. when reading from stdin with no base directory set), +// no restriction is applied: there is no meaningful boundary to enforce. +// Callers should not feed untrusted markdown to embedmd without a base +// directory set via WithBaseDir. +func checkPath(dir, path string) error { + if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") { + return nil + } + if dir == "" { + return nil + } + absResolved, err := filepath.Abs(filepath.Join(dir, filepath.FromSlash(path))) + if err != nil { + return fmt.Errorf("could not resolve path %q: %v", path, err) + } + absBase, err := filepath.Abs(dir) + if err != nil { + return fmt.Errorf("could not resolve base directory %q: %v", dir, err) + } + if !strings.HasPrefix(absResolved+string(os.PathSeparator), absBase+string(os.PathSeparator)) { + return fmt.Errorf("path %q escapes base directory", path) + } + return nil +} + func (e *embedder) runCommand(w io.Writer, cmd *command) error { + if err := checkPath(e.baseDir, cmd.path); err != nil { + return fmt.Errorf("could not read %s: %v", cmd.path, err) + } b, err := e.Fetch(e.baseDir, cmd.path) if err != nil { return fmt.Errorf("could not read %s: %v", cmd.path, err) diff --git a/embedmd/embedmd_test.go b/embedmd/embedmd_test.go index dba20d1..eb2dbf2 100644 --- a/embedmd/embedmd_test.go +++ b/embedmd/embedmd_test.go @@ -242,6 +242,37 @@ func TestProcess(t *testing.T) { "Yay!\n", err: "2: could not read https://fakeurl.com\\main.go: parse \"https://fakeurl.com\\\\main.go\": invalid character \"\\\\\" in host name", }, + { + name: "path traversal rejected when base dir is set", + dir: "sample", + in: "# Header\n" + + "[embedmd]:# (../secret.go)\n" + + "Yay!\n", + files: map[string][]byte{"secret.go": []byte(content)}, + err: `2: could not read ../secret.go: path "../secret.go" escapes base directory`, + }, + { + name: "deeply nested path traversal rejected", + dir: "a/b/c", + in: "# Header\n" + + "[embedmd]:# (../../../secret.go)\n" + + "Yay!\n", + err: `2: could not read ../../../secret.go: path "../../../secret.go" escapes base directory`, + }, + { + name: "normal relative path within base dir still works", + dir: "sample", + in: "# This is some markdown\n" + + "[embedmd]:# (code.go)\n" + + "Yay!\n", + files: map[string][]byte{"sample/code.go": []byte(content)}, + out: "# This is some markdown\n" + + "[embedmd]:# (code.go)\n" + + "```go\n" + + string(content) + + "```\n" + + "Yay!\n", + }, { name: "ignore commands in code blocks", in: "# This is some markdown\n" + @@ -285,8 +316,8 @@ type mixedContentProvider struct { func (c mixedContentProvider) Fetch(dir, path string) ([]byte, error) { if !strings.HasPrefix(path, "http://") && !strings.HasPrefix(path, "https://") { - path = filepath.Join(dir, filepath.FromSlash(path)) - if f, ok := c.files[path]; ok { + resolved := filepath.Join(dir, filepath.FromSlash(path)) + if f, ok := c.files[resolved]; ok { return f, nil } return nil, os.ErrNotExist