From d5319feb85c6f9f1e6f05bed0fda049d6eafca62 Mon Sep 17 00:00:00 2001
From: 6543 <m.huber@kithara.com>
Date: Sat, 16 Mar 2024 11:32:45 +0100
Subject: [PATCH] Refactor code_indexer to use an SearchOptions struct for
 PerformSearch (#29724)

similar to how it's already done for the issue_indexer

---
*Sponsored by Kithara Software GmbH*

Conflicts:
	routers/web/repo/search.go
---
 models/issues/issue_update.go                 |  2 +-
 modules/indexer/code/bleve/bleve.go           | 26 +++++++++----------
 .../code/elasticsearch/elasticsearch.go       | 26 ++++++++-----------
 modules/indexer/code/git.go                   |  2 +-
 modules/indexer/code/indexer_test.go          | 11 +++++++-
 modules/indexer/code/internal/indexer.go      | 15 +++++++++--
 modules/indexer/code/search.go                |  8 +++---
 routers/web/explore/code.go                   | 12 ++++++++-
 routers/web/repo/search.go                    | 13 ++++++++--
 routers/web/user/code.go                      | 12 ++++++++-
 10 files changed, 87 insertions(+), 40 deletions(-)

diff --git a/models/issues/issue_update.go b/models/issues/issue_update.go
index a0cf92c3ad..f20d552a1b 100644
--- a/models/issues/issue_update.go
+++ b/models/issues/issue_update.go
@@ -173,7 +173,7 @@ func ChangeIssueTitle(ctx context.Context, issue *Issue, doer *user_model.User,
 		return fmt.Errorf("createComment: %w", err)
 	}
 	if err = issue.AddCrossReferences(ctx, doer, true); err != nil {
-		return err
+		return fmt.Errorf("addCrossReferences: %w", err)
 	}
 
 	return committer.Commit()
diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go
index 107dd23598..d7f735e957 100644
--- a/modules/indexer/code/bleve/bleve.go
+++ b/modules/indexer/code/bleve/bleve.go
@@ -142,7 +142,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
 			return err
 		}
 		if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
-			return fmt.Errorf("Misformatted git cat-file output: %w", err)
+			return fmt.Errorf("misformatted git cat-file output: %w", err)
 		}
 	}
 
@@ -233,26 +233,26 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error {
 
 // Search searches for files in the specified repo.
 // Returns the matching file-paths
-func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
+func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
 	var (
 		indexerQuery query.Query
 		keywordQuery query.Query
 	)
 
-	if isFuzzy {
-		phraseQuery := bleve.NewMatchPhraseQuery(keyword)
+	if opts.IsKeywordFuzzy {
+		phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
 		phraseQuery.FieldVal = "Content"
 		phraseQuery.Analyzer = repoIndexerAnalyzer
 		keywordQuery = phraseQuery
 	} else {
-		prefixQuery := bleve.NewPrefixQuery(keyword)
+		prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
 		prefixQuery.FieldVal = "Content"
 		keywordQuery = prefixQuery
 	}
 
-	if len(repoIDs) > 0 {
-		repoQueries := make([]query.Query, 0, len(repoIDs))
-		for _, repoID := range repoIDs {
+	if len(opts.RepoIDs) > 0 {
+		repoQueries := make([]query.Query, 0, len(opts.RepoIDs))
+		for _, repoID := range opts.RepoIDs {
 			repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
 		}
 
@@ -266,8 +266,8 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
 
 	// Save for reuse without language filter
 	facetQuery := indexerQuery
-	if len(language) > 0 {
-		languageQuery := bleve.NewMatchQuery(language)
+	if len(opts.Language) > 0 {
+		languageQuery := bleve.NewMatchQuery(opts.Language)
 		languageQuery.FieldVal = "Language"
 		languageQuery.Analyzer = analyzer_keyword.Name
 
@@ -277,12 +277,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
 		)
 	}
 
-	from := (page - 1) * pageSize
+	from, pageSize := opts.GetSkipTake()
 	searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
 	searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
 	searchRequest.IncludeLocations = true
 
-	if len(language) == 0 {
+	if len(opts.Language) == 0 {
 		searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
 	}
 
@@ -326,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
 	}
 
 	searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10)
-	if len(language) > 0 {
+	if len(opts.Language) > 0 {
 		// Use separate query to go get all language counts
 		facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
 		facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go
index 065b0b2061..e4622fd66e 100644
--- a/modules/indexer/code/elasticsearch/elasticsearch.go
+++ b/modules/indexer/code/elasticsearch/elasticsearch.go
@@ -281,18 +281,18 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
 }
 
 // Search searches for codes and language stats by given conditions.
-func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
+func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
 	searchType := esMultiMatchTypePhrasePrefix
-	if isFuzzy {
+	if opts.IsKeywordFuzzy {
 		searchType = esMultiMatchTypeBestFields
 	}
 
-	kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType)
+	kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType)
 	query := elastic.NewBoolQuery()
 	query = query.Must(kwQuery)
-	if len(repoIDs) > 0 {
-		repoStrs := make([]any, 0, len(repoIDs))
-		for _, repoID := range repoIDs {
+	if len(opts.RepoIDs) > 0 {
+		repoStrs := make([]any, 0, len(opts.RepoIDs))
+		for _, repoID := range opts.RepoIDs {
 			repoStrs = append(repoStrs, repoID)
 		}
 		repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
@@ -300,16 +300,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
 	}
 
 	var (
-		start       int
-		kw          = "<em>" + keyword + "</em>"
-		aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
+		start, pageSize = opts.GetSkipTake()
+		kw              = "<em>" + opts.Keyword + "</em>"
+		aggregation     = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
 	)
 
-	if page > 0 {
-		start = (page - 1) * pageSize
-	}
-
-	if len(language) == 0 {
+	if len(opts.Language) == 0 {
 		searchResult, err := b.inner.Client.Search().
 			Index(b.inner.VersionedIndexName()).
 			Aggregation("language", aggregation).
@@ -330,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
 		return convertResult(searchResult, kw, pageSize)
 	}
 
-	langQuery := elastic.NewMatchQuery("language", language)
+	langQuery := elastic.NewMatchQuery("language", opts.Language)
 	countResult, err := b.inner.Client.Search().
 		Index(b.inner.VersionedIndexName()).
 		Aggregation("language", aggregation).
diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go
index 76cd78e11e..ea621c4e43 100644
--- a/modules/indexer/code/git.go
+++ b/modules/indexer/code/git.go
@@ -32,7 +32,7 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s
 
 	needGenesis := len(status.CommitSha) == 0
 	if !needGenesis {
-		hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
+		hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision)
 		stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
 		needGenesis = len(stdout) == 0
 	}
diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go
index 23dbd63410..8975c5ce40 100644
--- a/modules/indexer/code/indexer_test.go
+++ b/modules/indexer/code/indexer_test.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"testing"
 
+	"code.gitea.io/gitea/models/db"
 	"code.gitea.io/gitea/models/unittest"
 	"code.gitea.io/gitea/modules/git"
 	"code.gitea.io/gitea/modules/indexer/code/bleve"
@@ -70,7 +71,15 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
 
 		for _, kw := range keywords {
 			t.Run(kw.Keyword, func(t *testing.T) {
-				total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true)
+				total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{
+					RepoIDs: kw.RepoIDs,
+					Keyword: kw.Keyword,
+					Paginator: &db.ListOptions{
+						Page:     1,
+						PageSize: 10,
+					},
+					IsKeywordFuzzy: true,
+				})
 				assert.NoError(t, err)
 				assert.Len(t, kw.IDs, int(total))
 				assert.Len(t, langs, kw.Langs)
diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go
index c92419deb2..c259fcd26e 100644
--- a/modules/indexer/code/internal/indexer.go
+++ b/modules/indexer/code/internal/indexer.go
@@ -7,6 +7,7 @@ import (
 	"context"
 	"fmt"
 
+	"code.gitea.io/gitea/models/db"
 	repo_model "code.gitea.io/gitea/models/repo"
 	"code.gitea.io/gitea/modules/indexer/internal"
 )
@@ -16,7 +17,17 @@ type Indexer interface {
 	internal.Indexer
 	Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
 	Delete(ctx context.Context, repoID int64) error
-	Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
+	Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
+}
+
+type SearchOptions struct {
+	RepoIDs  []int64
+	Keyword  string
+	Language string
+
+	IsKeywordFuzzy bool
+
+	db.Paginator
 }
 
 // NewDummyIndexer returns a dummy indexer
@@ -38,6 +49,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
 	return fmt.Errorf("indexer is not ready")
 }
 
-func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
+func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) {
 	return 0, nil, nil, fmt.Errorf("indexer is not ready")
 }
diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go
index 89a62a8d3e..51c7595cf8 100644
--- a/modules/indexer/code/search.go
+++ b/modules/indexer/code/search.go
@@ -32,6 +32,8 @@ type ResultLine struct {
 
 type SearchResultLanguages = internal.SearchResultLanguages
 
+type SearchOptions = internal.SearchOptions
+
 func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
 	startIndex := selectionStartIndex
 	numLinesBefore := 0
@@ -125,12 +127,12 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
 
 // PerformSearch perform a search on a repository
 // if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
-func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
-	if len(keyword) == 0 {
+func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) {
+	if opts == nil || len(opts.Keyword) == 0 {
 		return 0, nil, nil, nil
 	}
 
-	total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy)
+	total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts)
 	if err != nil {
 		return 0, nil, nil, err
 	}
diff --git a/routers/web/explore/code.go b/routers/web/explore/code.go
index a6bc71ac9c..75bd0f3d24 100644
--- a/routers/web/explore/code.go
+++ b/routers/web/explore/code.go
@@ -6,6 +6,7 @@ package explore
 import (
 	"net/http"
 
+	"code.gitea.io/gitea/models/db"
 	repo_model "code.gitea.io/gitea/models/repo"
 	"code.gitea.io/gitea/modules/base"
 	code_indexer "code.gitea.io/gitea/modules/indexer/code"
@@ -77,7 +78,16 @@ func Code(ctx *context.Context) {
 	)
 
 	if (len(repoIDs) > 0) || isAdmin {
-		total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
+		total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
+			RepoIDs:        repoIDs,
+			Keyword:        keyword,
+			IsKeywordFuzzy: isFuzzy,
+			Language:       language,
+			Paginator: &db.ListOptions{
+				Page:     page,
+				PageSize: setting.UI.RepoSearchPagingNum,
+			},
+		})
 		if err != nil {
 			if code_indexer.IsAvailable(ctx) {
 				ctx.ServerError("SearchResults", err)
diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go
index 550a3dc8be..d22a691a70 100644
--- a/routers/web/repo/search.go
+++ b/routers/web/repo/search.go
@@ -6,6 +6,7 @@ package repo
 import (
 	"net/http"
 
+	"code.gitea.io/gitea/models/db"
 	"code.gitea.io/gitea/modules/base"
 	code_indexer "code.gitea.io/gitea/modules/indexer/code"
 	"code.gitea.io/gitea/modules/setting"
@@ -43,8 +44,16 @@ func Search(ctx *context.Context) {
 	if setting.Indexer.RepoIndexerEnabled {
 		ctx.Data["CodeIndexerEnabled"] = true
 
-		total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID},
-			language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
+		total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
+			RepoIDs:        []int64{ctx.Repo.Repository.ID},
+			Keyword:        keyword,
+			IsKeywordFuzzy: isFuzzy,
+			Language:       language,
+			Paginator: &db.ListOptions{
+				Page:     page,
+				PageSize: setting.UI.RepoSearchPagingNum,
+			},
+		})
 		if err != nil {
 			if code_indexer.IsAvailable(ctx) {
 				ctx.ServerError("SearchResults", err)
diff --git a/routers/web/user/code.go b/routers/web/user/code.go
index 8613d38b65..d2afdd8905 100644
--- a/routers/web/user/code.go
+++ b/routers/web/user/code.go
@@ -6,6 +6,7 @@ package user
 import (
 	"net/http"
 
+	"code.gitea.io/gitea/models/db"
 	repo_model "code.gitea.io/gitea/models/repo"
 	"code.gitea.io/gitea/modules/base"
 	code_indexer "code.gitea.io/gitea/modules/indexer/code"
@@ -75,7 +76,16 @@ func CodeSearch(ctx *context.Context) {
 	)
 
 	if len(repoIDs) > 0 {
-		total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
+		total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
+			RepoIDs:        repoIDs,
+			Keyword:        keyword,
+			IsKeywordFuzzy: isFuzzy,
+			Language:       language,
+			Paginator: &db.ListOptions{
+				Page:     page,
+				PageSize: setting.UI.RepoSearchPagingNum,
+			},
+		})
 		if err != nil {
 			if code_indexer.IsAvailable(ctx) {
 				ctx.ServerError("SearchResults", err)