diff --git a/changelogs/unreleased/content-type-check.yml b/changelogs/unreleased/content-type-check.yml new file mode 100644 index 0000000000000000000000000000000000000000..cac49ad2ab08ef30e21c127f21f86ee5b5515dec --- /dev/null +++ b/changelogs/unreleased/content-type-check.yml @@ -0,0 +1,5 @@ +--- +title: Check image content type before running exiftool in workhorse +merge_request: +author: +type: security diff --git a/workhorse/go.mod b/workhorse/go.mod index b6192af6b1b42da5e8de4590b5bc29c5adfdec33..e565feef37da6e5b0b949eda2d232e310f8b1e33 100644 --- a/workhorse/go.mod +++ b/workhorse/go.mod @@ -31,6 +31,7 @@ require ( gitlab.com/gitlab-org/gitaly v1.74.0 gitlab.com/gitlab-org/labkit v1.0.0 gocloud.dev v0.21.1-0.20201223184910-5094f54ed8bb + golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 golang.org/x/lint v0.0.0-20200302205851-738671d3881b golang.org/x/net v0.0.0-20201224014010-6772e930b67b golang.org/x/text v0.3.5 // indirect diff --git a/workhorse/internal/upload/exif/exif.go b/workhorse/internal/upload/exif/exif.go index af489e83b0b5e5ab23826a892237d9c6f7c8406c..db3c45431c08654a9c4a9d530eccc2056ae0e50a 100644 --- a/workhorse/internal/upload/exif/exif.go +++ b/workhorse/internal/upload/exif/exif.go @@ -23,6 +23,14 @@ type cleaner struct { eof bool } +type FileType int + +const ( + TypeUnknown FileType = iota + TypeJPEG + TypeTIFF +) + func NewCleaner(ctx context.Context, stdin io.Reader) (io.ReadCloser, error) { c := &cleaner{ctx: ctx} @@ -101,11 +109,20 @@ func (c *cleaner) startProcessing(stdin io.Reader) error { return nil } -func IsExifFile(filename string) bool { +func FileTypeFromSuffix(filename string) FileType { if os.Getenv("SKIP_EXIFTOOL") == "1" { - return false + return TypeUnknown + } + + jpegMatch := regexp.MustCompile(`(?i)^[^\n]*\.(jpg|jpeg)$`) + if jpegMatch.MatchString(filename) { + return TypeJPEG + } + + tiffMatch := regexp.MustCompile(`(?i)^[^\n]*\.tiff$`) + if tiffMatch.MatchString(filename) { + return TypeTIFF } - filenameMatch := regexp.MustCompile(`(?i)\.(jpg|jpeg|tiff)$`) - return filenameMatch.MatchString(filename) + return TypeUnknown } diff --git a/workhorse/internal/upload/exif/exif_test.go b/workhorse/internal/upload/exif/exif_test.go index 373d97f7fcea477f88e17e47dc700565fda253ec..ee5883d9e08385f8f8e730570b557f4277ae3cb6 100644 --- a/workhorse/internal/upload/exif/exif_test.go +++ b/workhorse/internal/upload/exif/exif_test.go @@ -11,39 +11,57 @@ import ( "github.com/stretchr/testify/require" ) -func TestIsExifFile(t *testing.T) { +func TestFileTypeFromSuffix(t *testing.T) { tests := []struct { name string - expected bool + expected FileType }{ { name: "/full/path.jpg", - expected: true, + expected: TypeJPEG, }, { name: "path.jpeg", - expected: true, + expected: TypeJPEG, }, { name: "path.tiff", - expected: true, + expected: TypeTIFF, }, { name: "path.JPG", - expected: true, + expected: TypeJPEG, }, { name: "path.tar", - expected: false, + expected: TypeUnknown, }, { name: "path", - expected: false, + expected: TypeUnknown, + }, + { + name: "something.jpg.py", + expected: TypeUnknown, + }, + { + name: "something.py.jpg", + expected: TypeJPEG, + }, + { + name: `something.jpg + .py`, + expected: TypeUnknown, + }, + { + name: `something.something + .jpg`, + expected: TypeUnknown, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - require.Equal(t, test.expected, IsExifFile(test.name)) + require.Equal(t, test.expected, FileTypeFromSuffix(test.name)) }) } } diff --git a/workhorse/internal/upload/exif/testdata/sample_exif.tiff b/workhorse/internal/upload/exif/testdata/sample_exif.tiff new file mode 100644 index 0000000000000000000000000000000000000000..6671d818edba223f20dcd707be3176d4ca042412 Binary files /dev/null and b/workhorse/internal/upload/exif/testdata/sample_exif.tiff differ diff --git a/workhorse/internal/upload/exif/testdata/sample_exif_corrupted.jpg b/workhorse/internal/upload/exif/testdata/sample_exif_corrupted.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3b5c692de5462365fe3cbb76e419b30b496faefc Binary files /dev/null and b/workhorse/internal/upload/exif/testdata/sample_exif_corrupted.jpg differ diff --git a/workhorse/internal/upload/exif/testdata/sample_exif_invalid.jpg b/workhorse/internal/upload/exif/testdata/sample_exif_invalid.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9f8a284c64fc06d43daf5a22640f097799e87a3d --- /dev/null +++ b/workhorse/internal/upload/exif/testdata/sample_exif_invalid.jpg @@ -0,0 +1 @@ +invalid data diff --git a/workhorse/internal/upload/rewrite.go b/workhorse/internal/upload/rewrite.go index b8c9194f180f5d03bb7723eec3f375a6ed9f3678..29c3e54f4f2f8bb7439e87d4871ec2de81196bd1 100644 --- a/workhorse/internal/upload/rewrite.go +++ b/workhorse/internal/upload/rewrite.go @@ -8,6 +8,7 @@ import ( "io/ioutil" "mime/multipart" "net/http" + "os" "path/filepath" "strings" @@ -15,6 +16,8 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "gitlab.com/gitlab-org/labkit/log" + "golang.org/x/image/tiff" + "gitlab.com/gitlab-org/gitlab-workhorse/internal/api" "gitlab.com/gitlab-org/gitlab-workhorse/internal/filestore" "gitlab.com/gitlab-org/gitlab-workhorse/internal/lsif_transformer/parser" @@ -127,9 +130,11 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa var inputReader io.ReadCloser var err error + + imageType := exif.FileTypeFromSuffix(filename) switch { - case exif.IsExifFile(filename): - inputReader, err = handleExifUpload(ctx, p, filename) + case imageType != exif.TypeUnknown: + inputReader, err = handleExifUpload(ctx, p, filename, imageType) if err != nil { return err } @@ -169,12 +174,48 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa return rew.filter.ProcessFile(ctx, name, fh, rew.writer) } -func handleExifUpload(ctx context.Context, r io.Reader, filename string) (io.ReadCloser, error) { +func handleExifUpload(ctx context.Context, r io.Reader, filename string, imageType exif.FileType) (io.ReadCloser, error) { + tmpfile, err := ioutil.TempFile("", "exifremove") + if err != nil { + return nil, err + } + go func() { + <-ctx.Done() + tmpfile.Close() + }() + if err := os.Remove(tmpfile.Name()); err != nil { + return nil, err + } + + _, err = io.Copy(tmpfile, r) + if err != nil { + return nil, err + } + + tmpfile.Seek(0, io.SeekStart) + isValidType := false + switch imageType { + case exif.TypeJPEG: + isValidType = isJPEG(tmpfile) + case exif.TypeTIFF: + isValidType = isTIFF(tmpfile) + } + + tmpfile.Seek(0, io.SeekStart) + if !isValidType { + log.WithContextFields(ctx, log.Fields{ + "filename": filename, + "imageType": imageType, + }).Print("invalid content type, not running exiftool") + + return tmpfile, nil + } + log.WithContextFields(ctx, log.Fields{ "filename": filename, }).Print("running exiftool to remove any metadata") - cleaner, err := exif.NewCleaner(ctx, r) + cleaner, err := exif.NewCleaner(ctx, tmpfile) if err != nil { return nil, err } @@ -182,6 +223,29 @@ func handleExifUpload(ctx context.Context, r io.Reader, filename string) (io.Rea return cleaner, nil } +func isTIFF(r io.Reader) bool { + _, err := tiff.Decode(r) + if err == nil { + return true + } + + if _, unsupported := err.(tiff.UnsupportedError); unsupported { + return true + } + + return false +} + +func isJPEG(r io.Reader) bool { + // Only the first 512 bytes are used to sniff the content type. + buf, err := ioutil.ReadAll(io.LimitReader(r, 512)) + if err != nil { + return false + } + + return http.DetectContentType(buf) == "image/jpeg" +} + func handleLsifUpload(ctx context.Context, reader io.Reader, tempPath, filename string, preauth *api.Response) (io.ReadCloser, error) { parserConfig := parser.Config{ TempPath: tempPath, diff --git a/workhorse/internal/upload/rewrite_test.go b/workhorse/internal/upload/rewrite_test.go new file mode 100644 index 0000000000000000000000000000000000000000..6fc41c3fefde42bc8d78a26a916eca2d5b1f40e0 --- /dev/null +++ b/workhorse/internal/upload/rewrite_test.go @@ -0,0 +1,43 @@ +package upload + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestImageTypeRecongition(t *testing.T) { + tests := []struct { + filename string + isJPEG bool + isTIFF bool + }{ + { + filename: "exif/testdata/sample_exif.jpg", + isJPEG: true, + isTIFF: false, + }, { + filename: "exif/testdata/sample_exif.tiff", + isJPEG: false, + isTIFF: true, + }, { + filename: "exif/testdata/sample_exif_corrupted.jpg", + isJPEG: true, + isTIFF: false, + }, { + filename: "exif/testdata/sample_exif_invalid.jpg", + isJPEG: false, + isTIFF: false, + }, + } + + for _, test := range tests { + t.Run(test.filename, func(t *testing.T) { + input, err := os.Open(test.filename) + require.NoError(t, err) + require.Equal(t, test.isJPEG, isJPEG(input)) + require.Equal(t, test.isTIFF, isTIFF(input)) + }) + } +} diff --git a/workhorse/internal/upload/uploads_test.go b/workhorse/internal/upload/uploads_test.go index 29282a6f88d381f10f2520274805ccfd0197bd3b..d77d73b5f48ba14207901dad467e63552cd7e82c 100644 --- a/workhorse/internal/upload/uploads_test.go +++ b/workhorse/internal/upload/uploads_test.go @@ -366,26 +366,28 @@ func TestInvalidFileNames(t *testing.T) { } func TestUploadHandlerRemovingExif(t *testing.T) { - tempPath, err := ioutil.TempDir("", "uploads") - require.NoError(t, err) - defer os.RemoveAll(tempPath) - - var buffer bytes.Buffer - content, err := ioutil.ReadFile("exif/testdata/sample_exif.jpg") require.NoError(t, err) - writer := multipart.NewWriter(&buffer) - file, err := writer.CreateFormFile("file", "test.jpg") - require.NoError(t, err) + runUploadTest(t, content, "sample_exif.jpg", 200, func(w http.ResponseWriter, r *http.Request) { + err := r.ParseMultipartForm(100000) + require.NoError(t, err) - _, err = file.Write(content) - require.NoError(t, err) + size, err := strconv.Atoi(r.FormValue("file.size")) + require.NoError(t, err) + require.True(t, size < len(content), "Expected the file to be smaller after removal of exif") + require.True(t, size > 0, "Expected to receive not empty file") - err = writer.Close() + w.WriteHeader(200) + fmt.Fprint(w, "RESPONSE") + }) +} + +func TestUploadHandlerRemovingExifTiff(t *testing.T) { + content, err := ioutil.ReadFile("exif/testdata/sample_exif.tiff") require.NoError(t, err) - ts := testhelper.TestServerWithHandler(regexp.MustCompile(`/url/path\z`), func(w http.ResponseWriter, r *http.Request) { + runUploadTest(t, content, "sample_exif.tiff", 200, func(w http.ResponseWriter, r *http.Request) { err := r.ParseMultipartForm(100000) require.NoError(t, err) @@ -397,30 +399,36 @@ func TestUploadHandlerRemovingExif(t *testing.T) { w.WriteHeader(200) fmt.Fprint(w, "RESPONSE") }) - defer ts.Close() +} - httpRequest, err := http.NewRequest("POST", ts.URL+"/url/path", &buffer) +func TestUploadHandlerRemovingExifInvalidContentType(t *testing.T) { + content, err := ioutil.ReadFile("exif/testdata/sample_exif_invalid.jpg") require.NoError(t, err) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() + runUploadTest(t, content, "sample_exif_invalid.jpg", 200, func(w http.ResponseWriter, r *http.Request) { + err := r.ParseMultipartForm(100000) + require.NoError(t, err) - httpRequest = httpRequest.WithContext(ctx) - httpRequest.ContentLength = int64(buffer.Len()) - httpRequest.Header.Set("Content-Type", writer.FormDataContentType()) - response := httptest.NewRecorder() + output, err := ioutil.ReadFile(r.FormValue("file.path")) + require.NoError(t, err) + require.Equal(t, content, output, "Expected the file to be same as before") - handler := newProxy(ts.URL) - apiResponse := &api.Response{TempPath: tempPath} - preparer := &DefaultPreparer{} - opts, _, err := preparer.Prepare(apiResponse) + w.WriteHeader(200) + fmt.Fprint(w, "RESPONSE") + }) +} + +func TestUploadHandlerRemovingExifCorruptedFile(t *testing.T) { + content, err := ioutil.ReadFile("exif/testdata/sample_exif_corrupted.jpg") require.NoError(t, err) - HandleFileUploads(response, httpRequest, handler, apiResponse, &testFormProcessor{}, opts) - require.Equal(t, 200, response.Code) + runUploadTest(t, content, "sample_exif_corrupted.jpg", 422, func(w http.ResponseWriter, r *http.Request) { + err := r.ParseMultipartForm(100000) + require.Error(t, err) + }) } -func TestUploadHandlerRemovingInvalidExif(t *testing.T) { +func runUploadTest(t *testing.T, image []byte, filename string, httpCode int, tsHandler func(http.ResponseWriter, *http.Request)) { tempPath, err := ioutil.TempDir("", "uploads") require.NoError(t, err) defer os.RemoveAll(tempPath) @@ -428,17 +436,16 @@ func TestUploadHandlerRemovingInvalidExif(t *testing.T) { var buffer bytes.Buffer writer := multipart.NewWriter(&buffer) - file, err := writer.CreateFormFile("file", "test.jpg") + file, err := writer.CreateFormFile("file", filename) + require.NoError(t, err) + + _, err = file.Write(image) require.NoError(t, err) - fmt.Fprint(file, "this is not valid image data") err = writer.Close() require.NoError(t, err) - ts := testhelper.TestServerWithHandler(regexp.MustCompile(`/url/path\z`), func(w http.ResponseWriter, r *http.Request) { - err := r.ParseMultipartForm(100000) - require.Error(t, err) - }) + ts := testhelper.TestServerWithHandler(regexp.MustCompile(`/url/path\z`), tsHandler) defer ts.Close() httpRequest, err := http.NewRequest("POST", ts.URL+"/url/path", &buffer) @@ -459,7 +466,7 @@ func TestUploadHandlerRemovingInvalidExif(t *testing.T) { require.NoError(t, err) HandleFileUploads(response, httpRequest, handler, apiResponse, &testFormProcessor{}, opts) - require.Equal(t, 422, response.Code) + require.Equal(t, httpCode, response.Code) } func newProxy(url string) *proxy.Proxy {