diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a190ea429d1f6e042da752e21b7452b1665fd87f..78444f7a80ccb87cbeb10ca316da571b06a87f18 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -14,6 +14,7 @@ verify: GITALY_ADDRESS: "tcp://gitaly:8075" script: - go version + - apt-get update && apt-get -y install libimage-exiftool-perl - make test test using go 1.10: diff --git a/CHANGELOG b/CHANGELOG index f15c20053a50841258f5c6a38997fd37efe6a988..067c9e767c4d762ec692196041373f9caba12447 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,14 @@ Formerly known as 'gitlab-git-http-server'. +v8.3.3 + +- Preserve orientation when removing EXIF + +v8.3.2 + +- Remove EXIF from JPEG/TIFF images + v 8.3.1 - Update gitaly-proto to 1.10.0 !363 @@ -27,6 +35,14 @@ v 8.1.0 - Update gitaly-proto to 0.124.0 !331 - Add distributed tracing with LabKit !325 +v 8.0.4 + +- Preserve orientation when removing EXIF + +v 8.0.3 + +- Remove EXIF from JPEG/TIFF images + v 8.0.2 - Fixed svg recognition to get the proper content type !353 diff --git a/README.md b/README.md index 5dfce77d5df9e37d2506389a85b705027b8d7037..7e94ae6ec79339f745a98d18beba70cff6b00c61 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,25 @@ make install PREFIX=/foo On some operating systems, such as FreeBSD, you may have to use `gmake` instead of `make`. +## Dependencies + +### Exiftool + +Workhorse uses [exiftool](https://www.sno.phy.queensu.ca/~phil/exiftool/) for +removing EXIF data (which may contain sensitive information) from uploaded +images. If you installed GitLab: + +- Using the Omnibus package, you're all set. +- From source, make sure `exiftool` is installed: + + ```sh + # Debian/Ubuntu + sudo apt-get install libimage-exiftool-perl + + # RHEL/CentOS + sudo yum install perl-Image-ExifTool + ``` + ## Error tracking GitLab-Workhorse supports remote error tracking with diff --git a/VERSION b/VERSION index 2bf50aaf17a6f89855dce48afd4e6cbb554047d6..d127a0ff9f199f05b8c074da7c00698f7324117e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -8.3.0 +8.3.3 diff --git a/internal/helper/helpers.go b/internal/helper/helpers.go index 5d2cb280871d5a29827e83e9a4b6ecbcb798d176..2acc357b278f549dec437069e10ba9e126d69d99 100644 --- a/internal/helper/helpers.go +++ b/internal/helper/helpers.go @@ -37,7 +37,11 @@ func LogError(r *http.Request, err error) { } func RequestEntityTooLarge(w http.ResponseWriter, r *http.Request, err error) { - http.Error(w, "Request Entity Too Large", http.StatusRequestEntityTooLarge) + CaptureAndFail(w, r, err, "Request Entity Too Large", http.StatusRequestEntityTooLarge) +} + +func CaptureAndFail(w http.ResponseWriter, r *http.Request, err error, msg string, code int) { + http.Error(w, msg, code) captureRavenError(r, err) printError(r, err) } diff --git a/internal/upload/exif/exif.go b/internal/upload/exif/exif.go new file mode 100644 index 0000000000000000000000000000000000000000..1fbd3e921112627913eac631b2da7b0ac7829c9a --- /dev/null +++ b/internal/upload/exif/exif.go @@ -0,0 +1,105 @@ +package exif + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "os/exec" + "regexp" + + "gitlab.com/gitlab-org/gitlab-workhorse/internal/log" +) + +var ErrRemovingExif = errors.New("error while removing EXIF") + +type cleaner struct { + ctx context.Context + cmd *exec.Cmd + stdout io.Reader + stderr bytes.Buffer + waitDone chan struct{} + waitErr error +} + +func NewCleaner(ctx context.Context, stdin io.Reader) (io.Reader, error) { + c := &cleaner{ + ctx: ctx, + waitDone: make(chan struct{}), + } + + if err := c.startProcessing(stdin); err != nil { + return nil, err + } + + return c, nil +} + +func (c *cleaner) Read(p []byte) (int, error) { + n, err := c.stdout.Read(p) + if err == io.EOF { + if waitErr := c.wait(); waitErr != nil { + log.WithFields(c.ctx, log.Fields{ + "command": c.cmd.Args, + "stderr": c.stderr.String(), + "error": waitErr.Error(), + }).Print("exiftool command failed") + return n, ErrRemovingExif + } + } + + return n, err +} + +func (c *cleaner) startProcessing(stdin io.Reader) error { + var err error + + whitelisted_tags := []string{ + "-ResolutionUnit", + "-XResolution", + "-YResolution", + "-YCbCrSubSampling", + "-YCbCrPositioning", + "-BitsPerSample", + "-ImageHeight", + "-ImageWidth", + "-ImageSize", + "-Copyright", + "-CopyrightNotice", + "-Orientation", + } + + args := append([]string{"-all=", "--IPTC:all", "--XMP-iptcExt:all", "-tagsFromFile", "@"}, whitelisted_tags...) + args = append(args, "-") + c.cmd = exec.CommandContext(c.ctx, "exiftool", args...) + + c.cmd.Stderr = &c.stderr + c.cmd.Stdin = stdin + + c.stdout, err = c.cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("failed to create stdout pipe: %v", err) + } + + if err = c.cmd.Start(); err != nil { + return fmt.Errorf("start %v: %v", c.cmd.Args, err) + } + go func() { + c.waitErr = c.cmd.Wait() + close(c.waitDone) + }() + + return nil +} + +func (c *cleaner) wait() error { + <-c.waitDone + return c.waitErr +} + +func IsExifFile(filename string) bool { + filenameMatch := regexp.MustCompile(`(?i)\.(jpg|jpeg|tiff)$`) + + return filenameMatch.MatchString(filename) +} diff --git a/internal/upload/exif/exif_test.go b/internal/upload/exif/exif_test.go new file mode 100644 index 0000000000000000000000000000000000000000..83a3d7edb0933e70e50c8e7a3e007d61dcb8fc06 --- /dev/null +++ b/internal/upload/exif/exif_test.go @@ -0,0 +1,77 @@ +package exif + +import ( + "context" + "io" + "io/ioutil" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestIsExifFile(t *testing.T) { + tests := []struct { + name string + expected bool + }{ + { + name: "/full/path.jpg", + expected: true, + }, + { + name: "path.jpeg", + expected: true, + }, + { + name: "path.tiff", + expected: true, + }, + { + name: "path.JPG", + expected: true, + }, + { + name: "path.tar", + expected: false, + }, + { + name: "path", + expected: false, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require.Equal(t, test.expected, IsExifFile(test.name)) + }) + } +} + +func TestNewCleanerWithValidFile(t *testing.T) { + input, err := os.Open("testdata/sample_exif.jpg") + require.NoError(t, err) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + cleaner, err := NewCleaner(ctx, input) + require.NoError(t, err, "Expected no error when creating cleaner command") + + size, err := io.Copy(ioutil.Discard, cleaner) + require.NoError(t, err, "Expected no error when reading output") + + sizeAfterStrip := int64(25399) + require.Equal(t, sizeAfterStrip, size, "Different size of converted image") +} + +func TestNewCleanerWithInvalidFile(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + cleaner, err := NewCleaner(ctx, strings.NewReader("invalid image")) + require.NoError(t, err, "Expected no error when creating cleaner command") + + size, err := io.Copy(ioutil.Discard, cleaner) + require.Error(t, err, "Expected error when reading output") + require.Equal(t, int64(0), size, "Size of invalid image should be 0") +} diff --git a/internal/upload/exif/testdata/sample_exif.jpg b/internal/upload/exif/testdata/sample_exif.jpg new file mode 100644 index 0000000000000000000000000000000000000000..05eda3f7f953eec3f7f9c08428328d9c3922d7d9 Binary files /dev/null and b/internal/upload/exif/testdata/sample_exif.jpg differ diff --git a/internal/upload/rewrite.go b/internal/upload/rewrite.go index 2086c8e95e7ef130c53064765483d7476b0fcbf1..d7c4d777eab3a1636cc4df77119ef0f61c98f6ba 100644 --- a/internal/upload/rewrite.go +++ b/internal/upload/rewrite.go @@ -12,6 +12,8 @@ import ( "gitlab.com/gitlab-org/gitlab-workhorse/internal/api" "gitlab.com/gitlab-org/gitlab-workhorse/internal/filestore" + "gitlab.com/gitlab-org/gitlab-workhorse/internal/log" + "gitlab.com/gitlab-org/gitlab-workhorse/internal/upload/exif" ) var ( @@ -113,12 +115,30 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa opts := filestore.GetOpts(rew.preauth) opts.TempFilePrefix = filename - fh, err := filestore.SaveFileFromReader(ctx, p, -1, opts) + var inputReader io.Reader + if exif.IsExifFile(filename) { + log.WithFields(ctx, log.Fields{ + "filename": filename, + }).Print("running exiftool to remove any metadata") + + cleaner, err := exif.NewCleaner(ctx, p) + if err != nil { + return fmt.Errorf("failed to start EXIF metadata cleaner: %v", err) + } + + inputReader = cleaner + } else { + inputReader = p + } + + fh, err := filestore.SaveFileFromReader(ctx, inputReader, -1, opts) if err != nil { - if err == filestore.ErrEntityTooLarge { + switch err { + case filestore.ErrEntityTooLarge, exif.ErrRemovingExif: return err + default: + return fmt.Errorf("persisting multipart file: %v", err) } - return fmt.Errorf("persisting multipart file: %v", err) } for key, value := range fh.GitLabFinalizeFields(name) { diff --git a/internal/upload/uploads.go b/internal/upload/uploads.go index 766d5552096b4c333c64c4805867fe56f49b3662..32bf514ee1fb2a93eb6f31ad71b4b37f9535c5a5 100644 --- a/internal/upload/uploads.go +++ b/internal/upload/uploads.go @@ -11,6 +11,7 @@ import ( "gitlab.com/gitlab-org/gitlab-workhorse/internal/api" "gitlab.com/gitlab-org/gitlab-workhorse/internal/filestore" "gitlab.com/gitlab-org/gitlab-workhorse/internal/helper" + "gitlab.com/gitlab-org/gitlab-workhorse/internal/upload/exif" ) // These methods are allowed to have thread-unsafe implementations. @@ -40,6 +41,8 @@ func HandleFileUploads(w http.ResponseWriter, r *http.Request, h http.Handler, p h.ServeHTTP(w, r) case filestore.ErrEntityTooLarge: helper.RequestEntityTooLarge(w, r, err) + case exif.ErrRemovingExif: + helper.CaptureAndFail(w, r, err, "Failed to process image", http.StatusUnprocessableEntity) default: helper.Fail500(w, r, fmt.Errorf("handleFileUploads: extract files from multipart: %v", err)) } diff --git a/internal/upload/uploads_test.go b/internal/upload/uploads_test.go index 45f0f6e0a501c6198c03b4c1ccb7135bf9fd28fa..e974569fa1b13ea0458cb95ced998059fe7f6b48 100644 --- a/internal/upload/uploads_test.go +++ b/internal/upload/uploads_test.go @@ -12,10 +12,13 @@ import ( "net/http/httptest" "os" "regexp" + "strconv" "strings" "testing" "time" + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitlab-workhorse/internal/api" "gitlab.com/gitlab-org/gitlab-workhorse/internal/filestore" "gitlab.com/gitlab-org/gitlab-workhorse/internal/helper" @@ -323,6 +326,93 @@ func TestInvalidFileNames(t *testing.T) { } } +func TestUploadHandlerRemovingExif(t *testing.T) { + tempPath, err := ioutil.TempDir("", "uploads") + require.NoError(t, err) + defer os.RemoveAll(tempPath) + + var buffer bytes.Buffer + + content, err := ioutil.ReadFile("exif/testdata/sample_exif.jpg") + require.NoError(t, err) + + writer := multipart.NewWriter(&buffer) + file, err := writer.CreateFormFile("file", "test.jpg") + require.NoError(t, err) + + _, err = file.Write(content) + require.NoError(t, err) + + err = writer.Close() + require.NoError(t, err) + + ts := testhelper.TestServerWithHandler(regexp.MustCompile(`/url/path\z`), func(w http.ResponseWriter, r *http.Request) { + err := r.ParseMultipartForm(100000) + require.NoError(t, err) + + size, err := strconv.Atoi(r.FormValue("file.size")) + require.NoError(t, err) + require.True(t, size < len(content), "Expected the file to be smaller after removal of exif") + require.True(t, size > 0, "Expected to receive not empty file") + + w.WriteHeader(200) + fmt.Fprint(w, "RESPONSE") + }) + defer ts.Close() + + httpRequest, err := http.NewRequest("POST", ts.URL+"/url/path", &buffer) + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + httpRequest = httpRequest.WithContext(ctx) + httpRequest.ContentLength = int64(buffer.Len()) + httpRequest.Header.Set("Content-Type", writer.FormDataContentType()) + response := httptest.NewRecorder() + + handler := newProxy(ts.URL) + HandleFileUploads(response, httpRequest, handler, &api.Response{TempPath: tempPath}, &testFormProcessor{}) + testhelper.AssertResponseCode(t, response, 200) +} + +func TestUploadHandlerRemovingInvalidExif(t *testing.T) { + tempPath, err := ioutil.TempDir("", "uploads") + require.NoError(t, err) + defer os.RemoveAll(tempPath) + + var buffer bytes.Buffer + + writer := multipart.NewWriter(&buffer) + file, err := writer.CreateFormFile("file", "test.jpg") + require.NoError(t, err) + + fmt.Fprint(file, "this is not valid image data") + err = writer.Close() + require.NoError(t, err) + + ts := testhelper.TestServerWithHandler(regexp.MustCompile(`/url/path\z`), func(w http.ResponseWriter, r *http.Request) { + err := r.ParseMultipartForm(100000) + require.Error(t, err) + }) + defer ts.Close() + + httpRequest, err := http.NewRequest("POST", ts.URL+"/url/path", &buffer) + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + httpRequest = httpRequest.WithContext(ctx) + httpRequest.ContentLength = int64(buffer.Len()) + httpRequest.Header.Set("Content-Type", writer.FormDataContentType()) + response := httptest.NewRecorder() + + handler := newProxy(ts.URL) + HandleFileUploads(response, httpRequest, handler, &api.Response{TempPath: tempPath}, &testFormProcessor{}) + testhelper.AssertResponseCode(t, response, 422) +} + func newProxy(url string) *proxy.Proxy { parsedURL := helper.URLMustParse(url) return proxy.NewProxy(parsedURL, "123", roundtripper.NewTestBackendRoundTripper(parsedURL))