diff --git a/Makefile b/Makefile index d14632f6b37cbc9f3b87423c4230e325cfccd16f..2d40398000f89afba41717b6dcdb1f2cfd1b3faa 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ PREFIX=/usr/local VERSION=$(shell git describe)-$(shell date -u +%Y%m%d.%H%M%S) -gitlab-workhorse: main.go githandler.go +gitlab-workhorse: main.go githandler.go archive.go git-http.go helpers.go go build -ldflags "-X main.Version ${VERSION}" -o gitlab-workhorse install: gitlab-workhorse diff --git a/archive.go b/archive.go new file mode 100644 index 0000000000000000000000000000000000000000..5492ceb995baa9166f7d29fde79304d20cf4fba7 --- /dev/null +++ b/archive.go @@ -0,0 +1,145 @@ +/* +In this file we handle 'git archive' downloads +*/ + +package main + +import ( + "fmt" + "io" + "io/ioutil" + "log" + "net/http" + "os" + "os/exec" + "path" + "time" +) + +func handleGetArchive(w http.ResponseWriter, r *gitRequest, format string) { + archiveFilename := path.Base(r.ArchivePath) + + if cachedArchive, err := os.Open(r.ArchivePath); err == nil { + defer cachedArchive.Close() + log.Printf("Serving cached file %q", r.ArchivePath) + setArchiveHeaders(w, format, archiveFilename) + // Even if somebody deleted the cachedArchive from disk since we opened + // the file, Unix file semantics guarantee we can still read from the + // open file in this process. + http.ServeContent(w, r.Request, "", time.Unix(0, 0), cachedArchive) + return + } + + // We assume the tempFile has a unique name so that concurrent requests are + // safe. We create the tempfile in the same directory as the final cached + // archive we want to create so that we can use an atomic link(2) operation + // to finalize the cached archive. + tempFile, err := prepareArchiveTempfile(path.Dir(r.ArchivePath), archiveFilename) + if err != nil { + fail500(w, "handleGetArchive create tempfile for archive", err) + } + defer tempFile.Close() + defer os.Remove(tempFile.Name()) + + compressCmd, archiveFormat := parseArchiveFormat(format) + + archiveCmd := gitCommand("", "git", "--git-dir="+r.RepoPath, "archive", "--format="+archiveFormat, "--prefix="+r.ArchivePrefix+"/", r.CommitId) + archiveStdout, err := archiveCmd.StdoutPipe() + if err != nil { + fail500(w, "handleGetArchive", err) + return + } + defer archiveStdout.Close() + if err := archiveCmd.Start(); err != nil { + fail500(w, "handleGetArchive", err) + return + } + defer cleanUpProcessGroup(archiveCmd) // Ensure brute force subprocess clean-up + + var stdout io.ReadCloser + if compressCmd == nil { + stdout = archiveStdout + } else { + compressCmd.Stdin = archiveStdout + + stdout, err = compressCmd.StdoutPipe() + if err != nil { + fail500(w, "handleGetArchive compressCmd stdout pipe", err) + return + } + defer stdout.Close() + + if err := compressCmd.Start(); err != nil { + fail500(w, "handleGetArchive start compressCmd process", err) + return + } + defer compressCmd.Wait() + + archiveStdout.Close() + } + // Every Read() from stdout will be synchronously written to tempFile + // before it comes out the TeeReader. + archiveReader := io.TeeReader(stdout, tempFile) + + // Start writing the response + setArchiveHeaders(w, format, archiveFilename) + w.WriteHeader(200) // Don't bother with HTTP 500 from this point on, just return + if _, err := io.Copy(w, archiveReader); err != nil { + logContext("handleGetArchive read from subprocess", err) + return + } + if err := archiveCmd.Wait(); err != nil { + logContext("handleGetArchive wait for archiveCmd", err) + return + } + if compressCmd != nil { + if err := compressCmd.Wait(); err != nil { + logContext("handleGetArchive wait for compressCmd", err) + return + } + } + + if err := finalizeCachedArchive(tempFile, r.ArchivePath); err != nil { + logContext("handleGetArchive finalize cached archive", err) + return + } +} + +func setArchiveHeaders(w http.ResponseWriter, format string, archiveFilename string) { + w.Header().Add("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, archiveFilename)) + if format == "zip" { + w.Header().Add("Content-Type", "application/zip") + } else { + w.Header().Add("Content-Type", "application/octet-stream") + } + w.Header().Add("Content-Transfer-Encoding", "binary") + w.Header().Add("Cache-Control", "private") +} + +func parseArchiveFormat(format string) (*exec.Cmd, string) { + switch format { + case "tar": + return nil, "tar" + case "tar.gz": + return exec.Command("gzip", "-c", "-n"), "tar" + case "tar.bz2": + return exec.Command("bzip2", "-c"), "tar" + case "zip": + return nil, "zip" + } + return nil, "unknown" +} + +func prepareArchiveTempfile(dir string, prefix string) (*os.File, error) { + if err := os.MkdirAll(dir, 0700); err != nil { + return nil, err + } + return ioutil.TempFile(dir, prefix) +} + +func finalizeCachedArchive(tempFile *os.File, archivePath string) error { + if err := tempFile.Close(); err != nil { + return err + } + return os.Link(tempFile.Name(), archivePath) +} diff --git a/git-http.go b/git-http.go new file mode 100644 index 0000000000000000000000000000000000000000..f3742b2eda86ea3c894f06c9528d097303781075 --- /dev/null +++ b/git-http.go @@ -0,0 +1,138 @@ +/* +In this file we handle the Git 'smart HTTP' protocol +*/ + +package main + +import ( + "compress/gzip" + "fmt" + "io" + "net/http" + "strings" +) + +func handleGetInfoRefs(w http.ResponseWriter, r *gitRequest, _ string) { + rpc := r.URL.Query().Get("service") + if !(rpc == "git-upload-pack" || rpc == "git-receive-pack") { + // The 'dumb' Git HTTP protocol is not supported + http.Error(w, "Not Found", 404) + return + } + + // Prepare our Git subprocess + cmd := gitCommand(r.GL_ID, "git", subCommand(rpc), "--stateless-rpc", "--advertise-refs", r.RepoPath) + stdout, err := cmd.StdoutPipe() + if err != nil { + fail500(w, "handleGetInfoRefs", err) + return + } + defer stdout.Close() + if err := cmd.Start(); err != nil { + fail500(w, "handleGetInfoRefs", err) + return + } + defer cleanUpProcessGroup(cmd) // Ensure brute force subprocess clean-up + + // Start writing the response + w.Header().Add("Content-Type", fmt.Sprintf("application/x-%s-advertisement", rpc)) + w.Header().Add("Cache-Control", "no-cache") + w.WriteHeader(200) // Don't bother with HTTP 500 from this point on, just return + if err := pktLine(w, fmt.Sprintf("# service=%s\n", rpc)); err != nil { + logContext("handleGetInfoRefs response", err) + return + } + if err := pktFlush(w); err != nil { + logContext("handleGetInfoRefs response", err) + return + } + if _, err := io.Copy(w, stdout); err != nil { + logContext("handleGetInfoRefs read from subprocess", err) + return + } + if err := cmd.Wait(); err != nil { + logContext("handleGetInfoRefs wait for subprocess", err) + return + } +} + +func handlePostRPC(w http.ResponseWriter, r *gitRequest, rpc string) { + var body io.ReadCloser + var err error + + // The client request body may have been gzipped. + if r.Header.Get("Content-Encoding") == "gzip" { + body, err = gzip.NewReader(r.Body) + if err != nil { + fail500(w, "handlePostRPC", err) + return + } + } else { + body = r.Body + } + defer body.Close() + + // Prepare our Git subprocess + cmd := gitCommand(r.GL_ID, "git", subCommand(rpc), "--stateless-rpc", r.RepoPath) + stdout, err := cmd.StdoutPipe() + if err != nil { + fail500(w, "handlePostRPC", err) + return + } + defer stdout.Close() + stdin, err := cmd.StdinPipe() + if err != nil { + fail500(w, "handlePostRPC", err) + return + } + defer stdin.Close() + if err := cmd.Start(); err != nil { + fail500(w, "handlePostRPC", err) + return + } + defer cleanUpProcessGroup(cmd) // Ensure brute force subprocess clean-up + + // Write the client request body to Git's standard input + if _, err := io.Copy(stdin, body); err != nil { + fail500(w, "handlePostRPC write to subprocess", err) + return + } + // Signal to the Git subprocess that no more data is coming + stdin.Close() + + // It may take a while before we return and the deferred closes happen + // so let's free up some resources already. + r.Body.Close() + // If the body was compressed, body != r.Body and this frees up the + // gzip.Reader. + body.Close() + + // Start writing the response + w.Header().Add("Content-Type", fmt.Sprintf("application/x-%s-result", rpc)) + w.Header().Add("Cache-Control", "no-cache") + w.WriteHeader(200) // Don't bother with HTTP 500 from this point on, just return + + // This io.Copy may take a long time, both for Git push and pull. + if _, err := io.Copy(w, stdout); err != nil { + logContext("handlePostRPC read from subprocess", err) + return + } + if err := cmd.Wait(); err != nil { + logContext("handlePostRPC wait for subprocess", err) + return + } +} + +func subCommand(rpc string) string { + return strings.TrimPrefix(rpc, "git-") +} + +func pktLine(w io.Writer, s string) error { + _, err := fmt.Fprintf(w, "%04x%s", len(s)+4, s) + return err +} + +func pktFlush(w io.Writer) error { + _, err := fmt.Fprint(w, "0000") + return err +} diff --git a/githandler.go b/githandler.go index 406befce531f047828b244814dd480464599d7af..96885b36cfba0f3372cd4c9e4a1f44ab0e5e90ad 100644 --- a/githandler.go +++ b/githandler.go @@ -1,25 +1,19 @@ /* The gitHandler type implements http.Handler. -All code for handling Git HTTP requests is in this file. +In this file we handle request routing and interaction with the authBackend. */ package main import ( - "compress/gzip" "encoding/json" - "fmt" "io" - "io/ioutil" "log" "net/http" "os" - "os/exec" "path" "strings" - "syscall" - "time" ) type gitHandler struct { @@ -172,297 +166,3 @@ func (h *gitHandler) doAuthRequest(r *http.Request) (result *http.Response, err authReq.Header.Set("GitLab-Git-HTTP-Server", Version) return h.httpClient.Do(authReq) } - -func handleGetInfoRefs(w http.ResponseWriter, r *gitRequest, _ string) { - rpc := r.URL.Query().Get("service") - if !(rpc == "git-upload-pack" || rpc == "git-receive-pack") { - // The 'dumb' Git HTTP protocol is not supported - http.Error(w, "Not Found", 404) - return - } - - // Prepare our Git subprocess - cmd := gitCommand(r.GL_ID, "git", subCommand(rpc), "--stateless-rpc", "--advertise-refs", r.RepoPath) - stdout, err := cmd.StdoutPipe() - if err != nil { - fail500(w, "handleGetInfoRefs", err) - return - } - defer stdout.Close() - if err := cmd.Start(); err != nil { - fail500(w, "handleGetInfoRefs", err) - return - } - defer cleanUpProcessGroup(cmd) // Ensure brute force subprocess clean-up - - // Start writing the response - w.Header().Add("Content-Type", fmt.Sprintf("application/x-%s-advertisement", rpc)) - w.Header().Add("Cache-Control", "no-cache") - w.WriteHeader(200) // Don't bother with HTTP 500 from this point on, just return - if err := pktLine(w, fmt.Sprintf("# service=%s\n", rpc)); err != nil { - logContext("handleGetInfoRefs response", err) - return - } - if err := pktFlush(w); err != nil { - logContext("handleGetInfoRefs response", err) - return - } - if _, err := io.Copy(w, stdout); err != nil { - logContext("handleGetInfoRefs read from subprocess", err) - return - } - if err := cmd.Wait(); err != nil { - logContext("handleGetInfoRefs wait for subprocess", err) - return - } -} - -func handleGetArchive(w http.ResponseWriter, r *gitRequest, format string) { - archiveFilename := path.Base(r.ArchivePath) - - if cachedArchive, err := os.Open(r.ArchivePath); err == nil { - defer cachedArchive.Close() - log.Printf("Serving cached file %q", r.ArchivePath) - setArchiveHeaders(w, format, archiveFilename) - // Even if somebody deleted the cachedArchive from disk since we opened - // the file, Unix file semantics guarantee we can still read from the - // open file in this process. - http.ServeContent(w, r.Request, "", time.Unix(0, 0), cachedArchive) - return - } - - // We assume the tempFile has a unique name so that concurrent requests are - // safe. We create the tempfile in the same directory as the final cached - // archive we want to create so that we can use an atomic link(2) operation - // to finalize the cached archive. - tempFile, err := prepareArchiveTempfile(path.Dir(r.ArchivePath), - archiveFilename) - if err != nil { - fail500(w, "handleGetArchive create tempfile for archive", err) - } - defer tempFile.Close() - defer os.Remove(tempFile.Name()) - - compressCmd, archiveFormat := parseArchiveFormat(format) - - archiveCmd := gitCommand("", "git", "--git-dir="+r.RepoPath, "archive", "--format="+archiveFormat, "--prefix="+r.ArchivePrefix+"/", r.CommitId) - archiveStdout, err := archiveCmd.StdoutPipe() - if err != nil { - fail500(w, "handleGetArchive", err) - return - } - defer archiveStdout.Close() - if err := archiveCmd.Start(); err != nil { - fail500(w, "handleGetArchive", err) - return - } - defer cleanUpProcessGroup(archiveCmd) // Ensure brute force subprocess clean-up - - var stdout io.ReadCloser - if compressCmd == nil { - stdout = archiveStdout - } else { - compressCmd.Stdin = archiveStdout - - stdout, err = compressCmd.StdoutPipe() - if err != nil { - fail500(w, "handleGetArchive compressCmd stdout pipe", err) - return - } - defer stdout.Close() - - if err := compressCmd.Start(); err != nil { - fail500(w, "handleGetArchive start compressCmd process", err) - return - } - defer compressCmd.Wait() - - archiveStdout.Close() - } - // Every Read() from stdout will be synchronously written to tempFile - // before it comes out the TeeReader. - archiveReader := io.TeeReader(stdout, tempFile) - - // Start writing the response - setArchiveHeaders(w, format, archiveFilename) - w.WriteHeader(200) // Don't bother with HTTP 500 from this point on, just return - if _, err := io.Copy(w, archiveReader); err != nil { - logContext("handleGetArchive read from subprocess", err) - return - } - if err := archiveCmd.Wait(); err != nil { - logContext("handleGetArchive wait for archiveCmd", err) - return - } - if compressCmd != nil { - if err := compressCmd.Wait(); err != nil { - logContext("handleGetArchive wait for compressCmd", err) - return - } - } - - if err := finalizeCachedArchive(tempFile, r.ArchivePath); err != nil { - logContext("handleGetArchive finalize cached archive", err) - return - } -} - -func setArchiveHeaders(w http.ResponseWriter, format string, archiveFilename string) { - w.Header().Add("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, archiveFilename)) - if format == "zip" { - w.Header().Add("Content-Type", "application/zip") - } else { - w.Header().Add("Content-Type", "application/octet-stream") - } - w.Header().Add("Content-Transfer-Encoding", "binary") - w.Header().Add("Cache-Control", "private") -} - -func parseArchiveFormat(format string) (*exec.Cmd, string) { - switch format { - case "tar": - return nil, "tar" - case "tar.gz": - return exec.Command("gzip", "-c", "-n"), "tar" - case "tar.bz2": - return exec.Command("bzip2", "-c"), "tar" - case "zip": - return nil, "zip" - } - return nil, "unknown" -} - -func prepareArchiveTempfile(dir string, prefix string) (*os.File, error) { - if err := os.MkdirAll(dir, 0700); err != nil { - return nil, err - } - return ioutil.TempFile(dir, prefix) -} - -func finalizeCachedArchive(tempFile *os.File, archivePath string) error { - if err := tempFile.Close(); err != nil { - return err - } - return os.Link(tempFile.Name(), archivePath) -} - -func handlePostRPC(w http.ResponseWriter, r *gitRequest, rpc string) { - var body io.ReadCloser - var err error - - // The client request body may have been gzipped. - if r.Header.Get("Content-Encoding") == "gzip" { - body, err = gzip.NewReader(r.Body) - if err != nil { - fail500(w, "handlePostRPC", err) - return - } - } else { - body = r.Body - } - defer body.Close() - - // Prepare our Git subprocess - cmd := gitCommand(r.GL_ID, "git", subCommand(rpc), "--stateless-rpc", r.RepoPath) - stdout, err := cmd.StdoutPipe() - if err != nil { - fail500(w, "handlePostRPC", err) - return - } - defer stdout.Close() - stdin, err := cmd.StdinPipe() - if err != nil { - fail500(w, "handlePostRPC", err) - return - } - defer stdin.Close() - if err := cmd.Start(); err != nil { - fail500(w, "handlePostRPC", err) - return - } - defer cleanUpProcessGroup(cmd) // Ensure brute force subprocess clean-up - - // Write the client request body to Git's standard input - if _, err := io.Copy(stdin, body); err != nil { - fail500(w, "handlePostRPC write to subprocess", err) - return - } - // Signal to the Git subprocess that no more data is coming - stdin.Close() - - // It may take a while before we return and the deferred closes happen - // so let's free up some resources already. - r.Body.Close() - // If the body was compressed, body != r.Body and this frees up the - // gzip.Reader. - body.Close() - - // Start writing the response - w.Header().Add("Content-Type", fmt.Sprintf("application/x-%s-result", rpc)) - w.Header().Add("Cache-Control", "no-cache") - w.WriteHeader(200) // Don't bother with HTTP 500 from this point on, just return - - // This io.Copy may take a long time, both for Git push and pull. - if _, err := io.Copy(w, stdout); err != nil { - logContext("handlePostRPC read from subprocess", err) - return - } - if err := cmd.Wait(); err != nil { - logContext("handlePostRPC wait for subprocess", err) - return - } -} - -func fail500(w http.ResponseWriter, context string, err error) { - http.Error(w, "Internal server error", 500) - logContext(context, err) -} - -func logContext(context string, err error) { - log.Printf("%s: %v", context, err) -} - -// Git subprocess helpers -func subCommand(rpc string) string { - return strings.TrimPrefix(rpc, "git-") -} - -func gitCommand(gl_id string, name string, args ...string) *exec.Cmd { - cmd := exec.Command(name, args...) - // Start the command in its own process group (nice for signalling) - cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} - // Explicitly set the environment for the Git command - cmd.Env = []string{ - fmt.Sprintf("PATH=%s", os.Getenv("PATH")), - fmt.Sprintf("GL_ID=%s", gl_id), - } - // If we don't do something with cmd.Stderr, Git errors will be lost - cmd.Stderr = os.Stderr - return cmd -} - -func cleanUpProcessGroup(cmd *exec.Cmd) { - if cmd == nil { - return - } - - process := cmd.Process - if process != nil && process.Pid > 0 { - // Send SIGTERM to the process group of cmd - syscall.Kill(-process.Pid, syscall.SIGTERM) - } - - // reap our child process - cmd.Wait() -} - -// Git HTTP line protocol functions -func pktLine(w io.Writer, s string) error { - _, err := fmt.Fprintf(w, "%04x%s", len(s)+4, s) - return err -} - -func pktFlush(w io.Writer) error { - _, err := fmt.Fprint(w, "0000") - return err -} diff --git a/helpers.go b/helpers.go new file mode 100644 index 0000000000000000000000000000000000000000..d51160f3b2a28c93528ceedc5f549110b6c3866b --- /dev/null +++ b/helpers.go @@ -0,0 +1,53 @@ +/* +Miscellaneous helpers: logging, errors, subprocesses +*/ + +package main + +import ( + "fmt" + "log" + "net/http" + "os" + "os/exec" + "syscall" +) + +func fail500(w http.ResponseWriter, context string, err error) { + http.Error(w, "Internal server error", 500) + logContext(context, err) +} + +func logContext(context string, err error) { + log.Printf("%s: %v", context, err) +} + +// Git subprocess helpers +func gitCommand(gl_id string, name string, args ...string) *exec.Cmd { + cmd := exec.Command(name, args...) + // Start the command in its own process group (nice for signalling) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + // Explicitly set the environment for the Git command + cmd.Env = []string{ + fmt.Sprintf("PATH=%s", os.Getenv("PATH")), + fmt.Sprintf("GL_ID=%s", gl_id), + } + // If we don't do something with cmd.Stderr, Git errors will be lost + cmd.Stderr = os.Stderr + return cmd +} + +func cleanUpProcessGroup(cmd *exec.Cmd) { + if cmd == nil { + return + } + + process := cmd.Process + if process != nil && process.Pid > 0 { + // Send SIGTERM to the process group of cmd + syscall.Kill(-process.Pid, syscall.SIGTERM) + } + + // reap our child process + cmd.Wait() +} diff --git a/main.go b/main.go index d0bb5f4c8d5a086804dc1de76e4847f9e8b00cb5..4174b26072307edf3fae5c76cbb3c4507d6f4f10 100644 --- a/main.go +++ b/main.go @@ -1,5 +1,5 @@ /* -gitlab-workhorse handles 'smart' Git HTTP requests for GitLab +gitlab-workhorse handles slow requests for GitLab This HTTP server can service 'git clone', 'git push' etc. commands from Git clients that use the 'smart' Git HTTP protocol (git-upload-pack @@ -9,8 +9,7 @@ backend (for authentication and authorization) and local disk access to Git repositories managed by GitLab. In GitLab, this role was previously performed by gitlab-grack. -This file contains the main() function. Actual Git HTTP requests are handled by -the gitHandler type, implemented in githandler.go. +In this file we start the web server and hand off to the gitHandler type. */ package main diff --git a/main_test.go b/main_test.go index 029b9429dd39bb4eb71fa966dd1ac70b3430532d..2b6d4eef3ed93a9b4b1a1ef22ea0a8fb0e8c6f9f 100644 --- a/main_test.go +++ b/main_test.go @@ -269,7 +269,7 @@ func testAuthServer(code int, body string) *httptest.Server { } func startServerOrFail(t *testing.T, ts *httptest.Server) *exec.Cmd { - cmd := exec.Command("go", "run", "main.go", "githandler.go", fmt.Sprintf("-authBackend=%s", ts.URL), fmt.Sprintf("-listenAddr=%s", servAddr)) + cmd := exec.Command("go", "run", "main.go", "githandler.go", "archive.go", "git-http.go", "helpers.go", fmt.Sprintf("-authBackend=%s", ts.URL), fmt.Sprintf("-listenAddr=%s", servAddr)) cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr