From 51fb6f3983f15aa3c2db1feadcc13db1692315ec Mon Sep 17 00:00:00 2001
From: Shiny Nematoda <snematoda.751k2@aleeas.com>
Date: Tue, 20 Feb 2024 11:05:42 +0000
Subject: [PATCH] [FEAT] add fallback repo search using git grep

---
 routers/web/repo/search.go               | 58 +++++++++------
 services/repository/files/search.go      | 90 ++++++++++++++++++++++++
 services/repository/files/search_test.go | 48 +++++++++++++
 templates/repo/home.tmpl                 | 30 ++++----
 templates/repo/search.tmpl               | 20 +++---
 tests/integration/repo_search_test.go    | 63 ++++++++++++-----
 6 files changed, 246 insertions(+), 63 deletions(-)
 create mode 100644 services/repository/files/search.go
 create mode 100644 services/repository/files/search_test.go

diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go
index 3c0fa4bc00..29b3b7b476 100644
--- a/routers/web/repo/search.go
+++ b/routers/web/repo/search.go
@@ -10,17 +10,13 @@ import (
 	"code.gitea.io/gitea/modules/context"
 	code_indexer "code.gitea.io/gitea/modules/indexer/code"
 	"code.gitea.io/gitea/modules/setting"
+	"code.gitea.io/gitea/services/repository/files"
 )
 
 const tplSearch base.TplName = "repo/search"
 
 // Search render repository search page
 func Search(ctx *context.Context) {
-	if !setting.Indexer.RepoIndexerEnabled {
-		ctx.Redirect(ctx.Repo.RepoLink)
-		return
-	}
-
 	language := ctx.FormTrim("l")
 	keyword := ctx.FormTrim("q")
 
@@ -37,31 +33,49 @@ func Search(ctx *context.Context) {
 		return
 	}
 
+	ctx.Data["SourcePath"] = ctx.Repo.Repository.Link()
+
 	page := ctx.FormInt("page")
 	if page <= 0 {
 		page = 1
 	}
 
-	total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID},
-		language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch)
-	if err != nil {
-		if code_indexer.IsAvailable(ctx) {
-			ctx.ServerError("SearchResults", err)
+	if setting.Indexer.RepoIndexerEnabled {
+		ctx.Data["CodeIndexerEnabled"] = true
+
+		total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID},
+			language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch)
+		if err != nil {
+			if code_indexer.IsAvailable(ctx) {
+				ctx.ServerError("SearchResults", err)
+				return
+			}
+			ctx.Data["CodeIndexerUnavailable"] = true
+		} else {
+			ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
+		}
+
+		ctx.Data["SearchResults"] = searchResults
+		ctx.Data["SearchResultLanguages"] = searchResultLanguages
+
+		pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5)
+		pager.SetDefaultParams(ctx)
+		pager.AddParam(ctx, "l", "Language")
+		ctx.Data["Page"] = pager
+	} else {
+		data, err := files.NewRepoGrep(ctx, ctx.Repo.Repository, keyword)
+		if err != nil {
+			ctx.ServerError("NewRepoGrep", err)
 			return
 		}
-		ctx.Data["CodeIndexerUnavailable"] = true
-	} else {
-		ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
+
+		ctx.Data["CodeIndexerEnabled"] = false
+		ctx.Data["SearchResults"] = data
+
+		pager := context.NewPagination(len(data), setting.UI.RepoSearchPagingNum, page, 5)
+		pager.SetDefaultParams(ctx)
+		ctx.Data["Page"] = pager
 	}
 
-	ctx.Data["SourcePath"] = ctx.Repo.Repository.Link()
-	ctx.Data["SearchResults"] = searchResults
-	ctx.Data["SearchResultLanguages"] = searchResultLanguages
-
-	pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5)
-	pager.SetDefaultParams(ctx)
-	pager.AddParam(ctx, "l", "Language")
-	ctx.Data["Page"] = pager
-
 	ctx.HTML(http.StatusOK, tplSearch)
 }
diff --git a/services/repository/files/search.go b/services/repository/files/search.go
new file mode 100644
index 0000000000..f8317c4892
--- /dev/null
+++ b/services/repository/files/search.go
@@ -0,0 +1,90 @@
+package files
+
+import (
+	"context"
+	"html/template"
+	"strconv"
+	"strings"
+
+	repo_model "code.gitea.io/gitea/models/repo"
+	"code.gitea.io/gitea/modules/git"
+	"code.gitea.io/gitea/modules/gitrepo"
+	"code.gitea.io/gitea/modules/highlight"
+	"code.gitea.io/gitea/modules/timeutil"
+
+	"github.com/go-enry/go-enry/v2"
+)
+
+type Result struct {
+	RepoID         int64 // ignored
+	Filename       string
+	CommitID       string             // branch
+	UpdatedUnix    timeutil.TimeStamp // ignored
+	Language       string
+	Color          string
+	LineNumbers    []int64
+	FormattedLines template.HTML
+}
+
+const pHEAD = "HEAD:"
+
+func NewRepoGrep(ctx context.Context, repo *repo_model.Repository, keyword string) ([]*Result, error) {
+	t, _, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo)
+	if err != nil {
+		return nil, err
+	}
+
+	data := []*Result{}
+
+	stdout, _, err := git.NewCommand(ctx,
+		"grep",
+		"-1", // n before and after lines
+		"-z",
+		"--heading",
+		"--break",         // easier parsing
+		"--fixed-strings", // disallow regex for now
+		"-n",              // line nums
+		"-i",              // ignore case
+		"--full-name",     // full file path, rel to repo
+		//"--column",        // for adding better highlighting support
+	).
+		AddDynamicArguments(keyword).
+		AddArguments("HEAD").
+		RunStdString(&git.RunOpts{Dir: t.Path})
+	if err != nil {
+		return data, nil // non zero exit code when there are no results
+	}
+
+	for _, block := range strings.Split(stdout, "\n\n") {
+		res := Result{CommitID: repo.DefaultBranch}
+		code := []string{}
+
+		for _, line := range strings.Split(block, "\n") {
+			if strings.HasPrefix(line, pHEAD) {
+				res.Filename = strings.TrimPrefix(line, pHEAD)
+				continue
+			}
+
+			if ln, after, ok := strings.Cut(line, "\x00"); ok {
+				i, err := strconv.ParseInt(ln, 10, 64)
+				if err != nil {
+					continue
+				}
+
+				res.LineNumbers = append(res.LineNumbers, i)
+				code = append(code, after)
+			}
+		}
+
+		if res.Filename == "" || len(code) == 0 || len(res.LineNumbers) == 0 {
+			continue
+		}
+
+		res.FormattedLines, res.Language = highlight.Code(res.Filename, "", strings.Join(code, "\n"))
+		res.Color = enry.GetColor(res.Language)
+
+		data = append(data, &res)
+	}
+
+	return data, nil
+}
diff --git a/services/repository/files/search_test.go b/services/repository/files/search_test.go
new file mode 100644
index 0000000000..c24bb731a8
--- /dev/null
+++ b/services/repository/files/search_test.go
@@ -0,0 +1,48 @@
+package files
+
+import (
+	"testing"
+
+	"code.gitea.io/gitea/models/unittest"
+	"code.gitea.io/gitea/modules/contexttest"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestNewRepoGrep(t *testing.T) {
+	unittest.PrepareTestEnv(t)
+	ctx, _ := contexttest.MockContext(t, "user2/repo1")
+	ctx.SetParams(":id", "1")
+	contexttest.LoadRepo(t, ctx, 1)
+	contexttest.LoadRepoCommit(t, ctx)
+	contexttest.LoadUser(t, ctx, 2)
+	contexttest.LoadGitRepo(t, ctx)
+	defer ctx.Repo.GitRepo.Close()
+
+	t.Run("with result", func(t *testing.T) {
+		res, err := NewRepoGrep(ctx, ctx.Repo.Repository, "Description")
+		assert.NoError(t, err)
+
+		expected := []*Result{
+			{
+				RepoID:         0,
+				Filename:       "README.md",
+				CommitID:       "master",
+				UpdatedUnix:    0,
+				Language:       "Markdown",
+				Color:          "#083fa1",
+				LineNumbers:    []int64{2, 3},
+				FormattedLines: "\nDescription for repo1",
+			},
+		}
+
+		assert.EqualValues(t, res, expected)
+	})
+
+	t.Run("empty result", func(t *testing.T) {
+		res, err := NewRepoGrep(ctx, ctx.Repo.Repository, "keyword that does not match in the repo")
+		assert.NoError(t, err)
+
+		assert.EqualValues(t, res, []*Result{})
+	})
+}
diff --git a/templates/repo/home.tmpl b/templates/repo/home.tmpl
index 5e27d9160c..9bac26ce1e 100644
--- a/templates/repo/home.tmpl
+++ b/templates/repo/home.tmpl
@@ -11,23 +11,21 @@
 				{{if $description}}<span class="description">{{$description | RenderCodeBlock}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{ctx.Locale.Tr "repo.no_desc"}}</span>{{end}}
 				<a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a>
 			</div>
-			{{if .RepoSearchEnabled}}
-				<div class="ui repo-search">
-					<form class="ui form ignore-dirty" action="{{.RepoLink}}/search" method="get">
-						<div class="field">
-							<div class="ui small action input{{if .CodeIndexerUnavailable}} disabled left icon{{end}}"{{if .CodeIndexerUnavailable}} data-tooltip-content="{{ctx.Locale.Tr "repo.search.code_search_unavailable"}}"{{end}}>
-								<input name="q" value="{{.Keyword}}"{{if .CodeIndexerUnavailable}} disabled{{end}} placeholder="{{ctx.Locale.Tr "repo.search.search_repo"}}">
-								{{if .CodeIndexerUnavailable}}
-									<i class="icon">{{svg "octicon-alert"}}</i>
-								{{end}}
-								<button class="ui small icon button"{{if .CodeIndexerUnavailable}} disabled{{end}} type="submit">
-									{{svg "octicon-search"}}
-								</button>
-							</div>
+			<div class="ui repo-search">
+				<form class="ui form ignore-dirty" action="{{.RepoLink}}/search" method="get">
+					<div class="field">
+						<div class="ui small action input{{if .CodeIndexerUnavailable}} disabled left icon{{end}}"{{if .CodeIndexerUnavailable}} data-tooltip-content="{{ctx.Locale.Tr "repo.search.code_search_unavailable"}}"{{end}}>
+							<input name="q" value="{{.Keyword}}"{{if .CodeIndexerUnavailable}} disabled{{end}} placeholder="{{ctx.Locale.Tr "repo.search.search_repo"}}">
+							{{if .CodeIndexerUnavailable}}
+								<i class="icon">{{svg "octicon-alert"}}</i>
+							{{end}}
+							<button class="ui small icon button"{{if .CodeIndexerUnavailable}} disabled{{end}} type="submit">
+								{{svg "octicon-search"}}
+							</button>
 						</div>
-					</form>
-				</div>
-			{{end}}
+					</div>
+				</form>
+			</div>
 		</div>
 		<div class="gt-df gt-ac gt-fw gt-gap-2" id="repo-topics">
 			{{range .Topics}}<a class="ui repo-topic large label topic gt-m-0" href="{{AppSubUrl}}/explore/repos?q={{.Name}}&topic=1">{{.Name}}</a>{{end}}
diff --git a/templates/repo/search.tmpl b/templates/repo/search.tmpl
index b616b4de32..3b5c212af3 100644
--- a/templates/repo/search.tmpl
+++ b/templates/repo/search.tmpl
@@ -6,14 +6,16 @@
 			<form class="ui form ignore-dirty" method="get">
 				<div class="ui fluid action input">
 					<input name="q" value="{{.Keyword}}"{{if .CodeIndexerUnavailable}} disabled{{end}} placeholder="{{ctx.Locale.Tr "repo.search.search_repo"}}">
-					<div class="ui dropdown selection {{if .CodeIndexerUnavailable}} disabled{{end}}" data-tooltip-content="{{ctx.Locale.Tr "repo.search.type.tooltip"}}">
-						<input name="t" type="hidden"{{if .CodeIndexerUnavailable}} disabled{{end}} value="{{.queryType}}">{{svg "octicon-triangle-down" 14 "dropdown icon"}}
-						<div class="text">{{ctx.Locale.Tr (printf "repo.search.%s" (or .queryType "fuzzy"))}}</div>
-						<div class="menu">
-							<div class="item" data-value="" data-tooltip-content="{{ctx.Locale.Tr "repo.search.fuzzy.tooltip"}}">{{ctx.Locale.Tr "repo.search.fuzzy"}}</div>
-							<div class="item" data-value="match" data-tooltip-content="{{ctx.Locale.Tr "repo.search.match.tooltip"}}">{{ctx.Locale.Tr "repo.search.match"}}</div>
+					{{if .CodeIndexerEnabled}}
+						<div class="ui dropdown selection {{if .CodeIndexerUnavailable}} disabled{{end}}" data-tooltip-content="{{ctx.Locale.Tr "repo.search.type.tooltip"}}">
+							<input name="t" type="hidden"{{if .CodeIndexerUnavailable}} disabled{{end}} value="{{.queryType}}">{{svg "octicon-triangle-down" 14 "dropdown icon"}}
+							<div class="text">{{ctx.Locale.Tr (printf "repo.search.%s" (or .queryType "fuzzy"))}}</div>
+							<div class="menu">
+								<div class="item" data-value="" data-tooltip-content="{{ctx.Locale.Tr "repo.search.fuzzy.tooltip"}}">{{ctx.Locale.Tr "repo.search.fuzzy"}}</div>
+								<div class="item" data-value="match" data-tooltip-content="{{ctx.Locale.Tr "repo.search.match.tooltip"}}">{{ctx.Locale.Tr "repo.search.match"}}</div>
+							</div>
 						</div>
-					</div>
+					{{end}}
 					<button class="ui icon button"{{if .CodeIndexerUnavailable}} disabled{{end}} type="submit">{{svg "octicon-search" 16}}</button>
 				</div>
 			</form>
@@ -41,7 +43,7 @@
 						<div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result">
 							<h4 class="ui top attached normal header gt-df gt-fw">
 								<span class="file gt-f1">{{.Filename}}</span>
-								<a role="button" class="ui basic tiny button" rel="nofollow" href="{{$.SourcePath}}/src/commit/{{PathEscape $result.CommitID}}/{{PathEscapeSegments .Filename}}">{{ctx.Locale.Tr "repo.diff.view_file"}}</a>
+								<a role="button" class="ui basic tiny button" rel="nofollow" href="{{$.SourcePath}}/src/{{if $.CodeIndexerEnabled}}commit{{else}}branch{{end}}/{{PathEscape $result.CommitID}}/{{PathEscapeSegments .Filename}}">{{ctx.Locale.Tr "repo.diff.view_file"}}</a>
 							</h4>
 							<div class="ui attached table segment">
 								<div class="file-body file-code code-view">
@@ -50,7 +52,7 @@
 											<tr>
 												<td class="lines-num">
 													{{range .LineNumbers}}
-														<a href="{{$.SourcePath}}/src/commit/{{PathEscape $result.CommitID}}/{{PathEscapeSegments $result.Filename}}#L{{.}}"><span>{{.}}</span></a>
+														<a href="{{$.SourcePath}}/src/{{if $.CodeIndexerEnabled}}commit{{else}}branch{{end}}/{{PathEscape $result.CommitID}}/{{PathEscapeSegments $result.Filename}}#L{{.}}"><span>{{.}}</span></a>
 													{{end}}
 												</td>
 												<td class="lines-code chroma"><code class="code-inner">{{.FormattedLines}}</code></td>
diff --git a/tests/integration/repo_search_test.go b/tests/integration/repo_search_test.go
index cf199e98c2..e5ee334ce8 100644
--- a/tests/integration/repo_search_test.go
+++ b/tests/integration/repo_search_test.go
@@ -11,14 +11,15 @@ import (
 	repo_model "code.gitea.io/gitea/models/repo"
 	code_indexer "code.gitea.io/gitea/modules/indexer/code"
 	"code.gitea.io/gitea/modules/setting"
+	"code.gitea.io/gitea/modules/test"
 	"code.gitea.io/gitea/tests"
 
 	"github.com/PuerkitoBio/goquery"
 	"github.com/stretchr/testify/assert"
 )
 
-func resultFilenames(t testing.TB, doc *HTMLDoc) []string {
-	filenameSelections := doc.doc.Find(".repository.search").Find(".repo-search-result").Find(".header").Find("span.file")
+func resultFilenames(t testing.TB, doc *goquery.Selection) []string {
+	filenameSelections := doc.Find(".header").Find("span.file")
 	result := make([]string, filenameSelections.Length())
 	filenameSelections.Each(func(i int, selection *goquery.Selection) {
 		result[i] = selection.Text()
@@ -26,36 +27,66 @@ func resultFilenames(t testing.TB, doc *HTMLDoc) []string {
 	return result
 }
 
-func TestSearchRepo(t *testing.T) {
+func checkResultLinks(t *testing.T, substr string, doc *goquery.Selection) {
+	t.Helper()
+	linkSelections := doc.Find("a[href]")
+	linkSelections.Each(func(i int, selection *goquery.Selection) {
+		assert.Contains(t, selection.AttrOr("href", ""), substr)
+	})
+}
+
+func testSearchRepo(t *testing.T, useExternalIndexer bool) {
 	defer tests.PrepareTestEnv(t)()
+	defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnabled, useExternalIndexer)()
 
 	repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
 	assert.NoError(t, err)
 
-	executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
+	gitReference := "/branch/" + repo.DefaultBranch
 
-	testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
+	if useExternalIndexer {
+		gitReference = "/commit/"
+		executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
+	}
 
-	setting.Indexer.IncludePatterns = setting.IndexerGlobFromString("**.txt")
-	setting.Indexer.ExcludePatterns = setting.IndexerGlobFromString("**/y/**")
+	testSearch(t, "/user2/repo1/search?q=Description&page=1", gitReference, []string{"README.md"})
 
-	repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
-	assert.NoError(t, err)
+	if useExternalIndexer {
+		setting.Indexer.IncludePatterns = setting.IndexerGlobFromString("**.txt")
+		setting.Indexer.ExcludePatterns = setting.IndexerGlobFromString("**/y/**")
 
-	executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
+		repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
+		assert.NoError(t, err)
 
-	testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
-	testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt"})
-	testSearch(t, "/user2/glob/search?q=file4&page=1", []string{})
-	testSearch(t, "/user2/glob/search?q=file5&page=1", []string{})
+		executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
+
+		testSearch(t, "/user2/glob/search?q=loren&page=1", gitReference, []string{"a.txt"})
+		testSearch(t, "/user2/glob/search?q=file3&page=1", gitReference, []string{"x/b.txt"})
+		testSearch(t, "/user2/glob/search?q=file4&page=1", gitReference, []string{})
+		testSearch(t, "/user2/glob/search?q=file5&page=1", gitReference, []string{})
+	}
 }
 
-func testSearch(t *testing.T, url string, expected []string) {
+func TestIndexerSearchRepo(t *testing.T) {
+	testSearchRepo(t, true)
+}
+
+func TestNoIndexerSearchRepo(t *testing.T) {
+	testSearchRepo(t, false)
+}
+
+func testSearch(t *testing.T, url, gitRef string, expected []string) {
 	req := NewRequest(t, "GET", url)
 	resp := MakeRequest(t, req, http.StatusOK)
 
-	filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
+	doc := NewHTMLParser(t, resp.Body).doc.
+		Find(".repository.search").
+		Find(".repo-search-result")
+
+	filenames := resultFilenames(t, doc)
 	assert.EqualValues(t, expected, filenames)
+
+	checkResultLinks(t, gitRef, doc)
 }
 
 func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) {