From 5f79550a0dade6a6cebdf8cf62f55ffa6df30b30 Mon Sep 17 00:00:00 2001
From: KN4CK3R <admin@oldschoolhack.me>
Date: Wed, 21 Feb 2024 19:54:17 +0100
Subject: [PATCH] Prevent double use of `git cat-file` session. (#29298)

Fixes the reason why #29101 is hard to replicate.
Related #29297

Create a repo with a file with minimum size 4097 bytes (I use 10000) and
execute the following code:
```go
gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, <repo>)
assert.NoError(t, err)

commit, err := gitRepo.GetCommit(<sha>)
assert.NoError(t, err)

entry, err := commit.GetTreeEntryByPath(<file>)
assert.NoError(t, err)

b := entry.Blob()

// Create a reader
r, err := b.DataAsync()
assert.NoError(t, err)
defer r.Close()

// Create a second reader
r2, err := b.DataAsync()
assert.NoError(t, err) // Should be no error but is ErrNotExist
defer r2.Close()
```

The problem is the check in `CatFileBatch`:

https://github.com/go-gitea/gitea/blob/79217ea63c1f77de7ca79813ae45950724e63d02/modules/git/repo_base_nogogit.go#L81-L87
`Buffered() > 0` is used to check if there is a "operation" in progress
at the moment. This is a problem because we can't control the internal
buffer in the `bufio.Reader`. The code above demonstrates a sequence
which initiates an operation for which the code thinks there is no
active processing. The second call to `DataAsync()` therefore reuses the
existing instances instead of creating a new batch reader.

(cherry picked from commit f74c869221624092999097af38b6f7fae4701420)
---
 modules/git/repo_base_nogogit.go | 21 ++++++++++-----
 tests/integration/git_test.go    | 44 ++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go
index d5a350a926..8c6eae5897 100644
--- a/modules/git/repo_base_nogogit.go
+++ b/modules/git/repo_base_nogogit.go
@@ -27,10 +27,12 @@ type Repository struct {
 
 	gpgSettings *GPGSettings
 
+	batchInUse  bool
 	batchCancel context.CancelFunc
 	batchReader *bufio.Reader
 	batchWriter WriteCloserError
 
+	checkInUse  bool
 	checkCancel context.CancelFunc
 	checkReader *bufio.Reader
 	checkWriter WriteCloserError
@@ -79,23 +81,28 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) {
 
 // CatFileBatch obtains a CatFileBatch for this repository
 func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) {
-	if repo.batchCancel == nil || repo.batchReader.Buffered() > 0 {
+	if repo.batchCancel == nil || repo.batchInUse {
 		log.Debug("Opening temporary cat file batch for: %s", repo.Path)
 		return CatFileBatch(ctx, repo.Path)
 	}
-	return repo.batchWriter, repo.batchReader, func() {}
+	repo.batchInUse = true
+	return repo.batchWriter, repo.batchReader, func() {
+		repo.batchInUse = false
+	}
 }
 
 // CatFileBatchCheck obtains a CatFileBatchCheck for this repository
 func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) {
-	if repo.checkCancel == nil || repo.checkReader.Buffered() > 0 {
-		log.Debug("Opening temporary cat file batch-check: %s", repo.Path)
+	if repo.checkCancel == nil || repo.checkInUse {
+		log.Debug("Opening temporary cat file batch-check for: %s", repo.Path)
 		return CatFileBatchCheck(ctx, repo.Path)
 	}
-	return repo.checkWriter, repo.checkReader, func() {}
+	repo.checkInUse = true
+	return repo.checkWriter, repo.checkReader, func() {
+		repo.checkInUse = false
+	}
 }
 
-// Close this repository, in particular close the underlying gogitStorage if this is not nil
 func (repo *Repository) Close() (err error) {
 	if repo == nil {
 		return nil
@@ -105,12 +112,14 @@ func (repo *Repository) Close() (err error) {
 		repo.batchReader = nil
 		repo.batchWriter = nil
 		repo.batchCancel = nil
+		repo.batchInUse = false
 	}
 	if repo.checkCancel != nil {
 		repo.checkCancel()
 		repo.checkCancel = nil
 		repo.checkReader = nil
 		repo.checkWriter = nil
+		repo.checkInUse = false
 	}
 	repo.LastCommitCache = nil
 	repo.tagCache = nil
diff --git a/tests/integration/git_test.go b/tests/integration/git_test.go
index 365e7f91b1..ff5421d93a 100644
--- a/tests/integration/git_test.go
+++ b/tests/integration/git_test.go
@@ -4,6 +4,7 @@
 package integration
 
 import (
+	"bytes"
 	"encoding/hex"
 	"fmt"
 	"math/rand"
@@ -26,9 +27,11 @@ import (
 	user_model "code.gitea.io/gitea/models/user"
 	gitea_context "code.gitea.io/gitea/modules/context"
 	"code.gitea.io/gitea/modules/git"
+	"code.gitea.io/gitea/modules/gitrepo"
 	"code.gitea.io/gitea/modules/lfs"
 	"code.gitea.io/gitea/modules/setting"
 	api "code.gitea.io/gitea/modules/structs"
+	files_service "code.gitea.io/gitea/services/repository/files"
 	"code.gitea.io/gitea/tests"
 
 	"github.com/stretchr/testify/assert"
@@ -1034,3 +1037,44 @@ func doCreateAgitFlowPull(dstPath string, ctx *APITestContext, baseBranch, headB
 		t.Run("CheckoutMasterAgain", doGitCheckoutBranch(dstPath, "master"))
 	}
 }
+
+func TestDataAsync_Issue29101(t *testing.T) {
+	onGiteaRun(t, func(t *testing.T, u *url.URL) {
+		user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
+		repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1})
+
+		resp, err := files_service.ChangeRepoFiles(db.DefaultContext, repo, user, &files_service.ChangeRepoFilesOptions{
+			Files: []*files_service.ChangeRepoFile{
+				{
+					Operation:     "create",
+					TreePath:      "test.txt",
+					ContentReader: bytes.NewReader(make([]byte, 10000)),
+				},
+			},
+			OldBranch: repo.DefaultBranch,
+			NewBranch: repo.DefaultBranch,
+		})
+		assert.NoError(t, err)
+
+		sha := resp.Commit.SHA
+
+		gitRepo, err := gitrepo.OpenRepository(db.DefaultContext, repo)
+		assert.NoError(t, err)
+
+		commit, err := gitRepo.GetCommit(sha)
+		assert.NoError(t, err)
+
+		entry, err := commit.GetTreeEntryByPath("test.txt")
+		assert.NoError(t, err)
+
+		b := entry.Blob()
+
+		r, err := b.DataAsync()
+		assert.NoError(t, err)
+		defer r.Close()
+
+		r2, err := b.DataAsync()
+		assert.NoError(t, err)
+		defer r2.Close()
+	})
+}