forgejo/modules/references/references.go
Adam Majer d68a613ba8
Add support for sha256 repositories ()
Currently only SHA1 repositories are supported by Gitea. This adds
support for alternate SHA256 with the additional aim of easier support
for additional hash types in the future.

Fixes: 
Limited by: https://github.com/go-git/go-git/issues/899
Depend on: 

<img width="776" alt="图片" src="https://github.com/go-gitea/gitea/assets/81045/5448c9a7-608e-4341-a149-5dd0069c9447">

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: 6543 <6543@obermui.de>
2024-01-19 17:05:02 +01:00

595 lines
19 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package references
import (
"bytes"
"net/url"
"regexp"
"strconv"
"strings"
"sync"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup/mdstripper"
"code.gitea.io/gitea/modules/setting"
"github.com/yuin/goldmark/util"
)
var (
// validNamePattern performs only the most basic validation for user or repository names
// Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
// NOTE: All below regex matching do not perform any extra validation.
// Thus a link is produced even if the linked entity does not exist.
// While fast, this is also incorrect and lead to false positives.
// TODO: fix invalid linking issue
// mentionPattern matches all mentions in the form of "@user" or "@org/team"
mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
// issueNumericPattern matches string that references to a numeric issue, e.g. #1287
issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\')([#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
// issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
// crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
// e.g. org/repo#12345
crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
// crossReferenceCommitPattern matches a string that references a commit in a different repository
// e.g. go-gitea/gitea@d8a994ef, go-gitea/gitea@d8a994ef243349f321568f9e36d5c3f444b99cae (7-40 characters)
crossReferenceCommitPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+)/([0-9a-zA-Z-_\.]+)@([0-9a-f]{7,64})(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
// spaceTrimmedPattern let's find the trailing space
spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`)
// timeLogPattern matches string for time tracking
timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
issueKeywordsOnce sync.Once
giteaHostInit sync.Once
giteaHost string
giteaIssuePullPattern *regexp.Regexp
actionStrings = []string{
"none",
"closes",
"reopens",
"neutered",
}
)
// XRefAction represents the kind of effect a cross reference has once is resolved
type XRefAction int64
const (
// XRefActionNone means the cross-reference is simply a comment
XRefActionNone XRefAction = iota // 0
// XRefActionCloses means the cross-reference should close an issue if it is resolved
XRefActionCloses // 1
// XRefActionReopens means the cross-reference should reopen an issue if it is resolved
XRefActionReopens // 2
// XRefActionNeutered means the cross-reference will no longer affect the source
XRefActionNeutered // 3
)
func (a XRefAction) String() string {
return actionStrings[a]
}
// IssueReference contains an unverified cross-reference to a local issue or pull request
type IssueReference struct {
Index int64
Owner string
Name string
Action XRefAction
TimeLog string
}
// RenderizableReference contains an unverified cross-reference to with rendering information
// The IsPull member means that a `!num` reference was used instead of `#num`.
// This kind of reference is used to make pulls available when an external issue tracker
// is used. Otherwise, `#` and `!` are completely interchangeable.
type RenderizableReference struct {
Issue string
Owner string
Name string
CommitSha string
IsPull bool
RefLocation *RefSpan
Action XRefAction
ActionLocation *RefSpan
}
type rawReference struct {
index int64
owner string
name string
isPull bool
action XRefAction
issue string
refLocation *RefSpan
actionLocation *RefSpan
timeLog string
}
func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
refarr := make([]IssueReference, len(reflist))
for i, r := range reflist {
refarr[i] = IssueReference{
Index: r.index,
Owner: r.owner,
Name: r.name,
Action: r.action,
TimeLog: r.timeLog,
}
}
return refarr
}
// RefSpan is the position where the reference was found within the parsed text
type RefSpan struct {
Start int
End int
}
func makeKeywordsPat(words []string) *regexp.Regexp {
acceptedWords := parseKeywords(words)
if len(acceptedWords) == 0 {
// Never match
return nil
}
return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
}
func parseKeywords(words []string) []string {
acceptedWords := make([]string, 0, 5)
wordPat := regexp.MustCompile(`^[\pL]+$`)
for _, word := range words {
word = strings.ToLower(strings.TrimSpace(word))
// Accept Unicode letter class runes (a-z, á, à, ä, )
if wordPat.MatchString(word) {
acceptedWords = append(acceptedWords, word)
} else {
log.Info("Invalid keyword: %s", word)
}
}
return acceptedWords
}
func newKeywords() {
issueKeywordsOnce.Do(func() {
// Delay initialization until after the settings module is initialized
doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
})
}
func doNewKeywords(close, reopen []string) {
issueCloseKeywordsPat = makeKeywordsPat(close)
issueReopenKeywordsPat = makeKeywordsPat(reopen)
}
// getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
func getGiteaHostName() string {
giteaHostInit.Do(func() {
if uapp, err := url.Parse(setting.AppURL); err == nil {
giteaHost = strings.ToLower(uapp.Host)
giteaIssuePullPattern = regexp.MustCompile(
`(\s|^|\(|\[)` +
regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) +
`([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` +
`((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
} else {
giteaHost = ""
giteaIssuePullPattern = nil
}
})
return giteaHost
}
// getGiteaIssuePullPattern
func getGiteaIssuePullPattern() *regexp.Regexp {
getGiteaHostName()
return giteaIssuePullPattern
}
// FindAllMentionsMarkdown matches mention patterns in given content and
// returns a list of found unvalidated user names **not including** the @ prefix.
func FindAllMentionsMarkdown(content string) []string {
bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
locations := FindAllMentionsBytes(bcontent)
mentions := make([]string, len(locations))
for i, val := range locations {
mentions[i] = string(bcontent[val.Start+1 : val.End])
}
return mentions
}
// FindAllMentionsBytes matches mention patterns in given content
// and returns a list of locations for the unvalidated user names, including the @ prefix.
func FindAllMentionsBytes(content []byte) []RefSpan {
// Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
// trailing spaces (\s@mention,\s), so if we get two consecutive references, the space
// from the second reference will be "eaten" by the first one:
// ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...`
ret := make([]RefSpan, 0, 5)
pos := 0
for {
match := mentionPattern.FindSubmatchIndex(content[pos:])
if match == nil {
break
}
ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos})
notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
if notrail == nil {
pos = match[3] + pos
} else {
pos = match[3] + pos + notrail[1] - notrail[3]
}
}
return ret
}
// FindFirstMentionBytes matches the first mention in then given content
// and returns the location of the unvalidated user name, including the @ prefix.
func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
mention := mentionPattern.FindSubmatchIndex(content)
if mention == nil {
return false, RefSpan{}
}
return true, RefSpan{Start: mention[2], End: mention[3]}
}
// FindAllIssueReferencesMarkdown strips content from markdown markup
// and returns a list of unvalidated references found in it.
func FindAllIssueReferencesMarkdown(content string) []IssueReference {
return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
}
func findAllIssueReferencesMarkdown(content string) []*rawReference {
bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
return findAllIssueReferencesBytes(bcontent, links)
}
func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) {
// We will iterate through the content, rewrite and simplify full references.
//
// We want to transform something like:
//
// this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo
// https://ourgitea.com/git/owner/repo/pulls/123456789
//
// Into something like:
//
// this is a #123456789, foo
// !123456789
pos := 0
for {
// re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)
match := re.FindSubmatchIndex((*contentBytes)[pos:])
if match == nil {
break
}
// match is a bunch of indices into the content from pos onwards so
// to simplify things let's just add pos to all of the indices in match
for i := range match {
match[i] += pos
}
// match[0]-match[1] is whole string
// match[2]-match[3] is preamble
// move the position to the end of the preamble
pos = match[3]
// match[4]-match[5] is owner/repo
// now copy the owner/repo to end of the preamble
endPos := pos + match[5] - match[4]
copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]])
// move the current position to the end of the newly copied owner/repo
pos = endPos
// Now set the issue/pull marker:
//
// match[6]-match[7] == 'issues'
(*contentBytes)[pos] = '#'
if string((*contentBytes)[match[6]:match[7]]) == "pulls" {
(*contentBytes)[pos] = '!'
}
pos++
// Then add the issue/pull number
//
// match[8]-match[9] is the number
endPos = pos + match[9] - match[8]
copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]])
// Now copy what's left at the end of the string to the new end position
copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:])
// now we reset the length
// our new section has length endPos - match[3]
// our old section has length match[9] - match[3]
*contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos]
pos = endPos
}
}
// FindAllIssueReferences returns a list of unvalidated references found in a string.
func FindAllIssueReferences(content string) []IssueReference {
// Need to convert fully qualified html references to local system to #/! short codes
contentBytes := []byte(content)
if re := getGiteaIssuePullPattern(); re != nil {
convertFullHTMLReferencesToShortRefs(re, &contentBytes)
} else {
log.Debug("No GiteaIssuePullPattern pattern")
}
return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{}))
}
// FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
func FindRenderizableReferenceNumeric(content string, prOnly, crossLinkOnly bool) (bool, *RenderizableReference) {
var match []int
if !crossLinkOnly {
match = issueNumericPattern.FindStringSubmatchIndex(content)
}
if match == nil {
if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
return false, nil
}
}
r := getCrossReference(util.StringToReadOnlyBytes(content), match[2], match[3], false, prOnly)
if r == nil {
return false, nil
}
return true, &RenderizableReference{
Issue: r.issue,
Owner: r.owner,
Name: r.name,
IsPull: r.isPull,
RefLocation: r.refLocation,
Action: r.action,
ActionLocation: r.actionLocation,
}
}
// FindRenderizableCommitCrossReference returns the first unvalidated commit cross reference found in a string.
func FindRenderizableCommitCrossReference(content string) (bool, *RenderizableReference) {
m := crossReferenceCommitPattern.FindStringSubmatchIndex(content)
if len(m) < 8 {
return false, nil
}
return true, &RenderizableReference{
Owner: content[m[2]:m[3]],
Name: content[m[4]:m[5]],
CommitSha: content[m[6]:m[7]],
RefLocation: &RefSpan{Start: m[2], End: m[7]},
}
}
// FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string.
func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) {
match := pattern.FindStringSubmatchIndex(content)
if len(match) < 4 {
return false, nil
}
action, location := findActionKeywords([]byte(content), match[2])
return true, &RenderizableReference{
Issue: content[match[2]:match[3]],
RefLocation: &RefSpan{Start: match[0], End: match[1]},
Action: action,
ActionLocation: location,
IsPull: false,
}
}
// FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
if match == nil {
return false, nil
}
action, location := findActionKeywords([]byte(content), match[2])
return true, &RenderizableReference{
Issue: content[match[2]:match[3]],
RefLocation: &RefSpan{Start: match[2], End: match[3]},
Action: action,
ActionLocation: location,
IsPull: false,
}
}
// FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
ret := make([]*rawReference, 0, 10)
pos := 0
// Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
// trailing spaces (\s#ref,\s), so if we get two consecutive references, the space
// from the second reference will be "eaten" by the first one:
// ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...`
for {
match := issueNumericPattern.FindSubmatchIndex(content[pos:])
if match == nil {
break
}
if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
ret = append(ret, ref)
}
notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
if notrail == nil {
pos = match[3] + pos
} else {
pos = match[3] + pos + notrail[1] - notrail[3]
}
}
pos = 0
for {
match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:])
if match == nil {
break
}
if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
ret = append(ret, ref)
}
notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
if notrail == nil {
pos = match[3] + pos
} else {
pos = match[3] + pos + notrail[1] - notrail[3]
}
}
localhost := getGiteaHostName()
for _, link := range links {
if u, err := url.Parse(link); err == nil {
// Note: we're not attempting to match the URL scheme (http/https)
host := strings.ToLower(u.Host)
if host != "" && host != localhost {
continue
}
parts := strings.Split(u.EscapedPath(), "/")
// /user/repo/issues/3
if len(parts) != 5 || parts[0] != "" {
continue
}
var sep string
if parts[3] == "issues" {
sep = "#"
} else if parts[3] == "pulls" {
sep = "!"
} else {
continue
}
// Note: closing/reopening keywords not supported with URLs
bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])
if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {
ref.refLocation = nil
ret = append(ret, ref)
}
}
}
if len(ret) == 0 {
return ret
}
pos = 0
for {
match := timeLogPattern.FindSubmatchIndex(content[pos:])
if match == nil {
break
}
timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos])
var f *rawReference
for _, ref := range ret {
if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) {
f = ref
}
}
pos = match[1] + pos
if f == nil {
f = ret[0]
}
if len(f.timeLog) == 0 {
f.timeLog = timeLogEntry
}
}
return ret
}
func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference {
sep := bytes.IndexAny(content[start:end], "#!")
if sep < 0 {
return nil
}
isPull := content[start+sep] == '!'
if prOnly && !isPull {
return nil
}
repo := string(content[start : start+sep])
issue := string(content[start+sep+1 : end])
index, err := strconv.ParseInt(issue, 10, 64)
if err != nil {
return nil
}
if repo == "" {
if fromLink {
// Markdown links must specify owner/repo
return nil
}
action, location := findActionKeywords(content, start)
return &rawReference{
index: index,
action: action,
issue: issue,
isPull: isPull,
refLocation: &RefSpan{Start: start, End: end},
actionLocation: location,
}
}
parts := strings.Split(strings.ToLower(repo), "/")
if len(parts) != 2 {
return nil
}
owner, name := parts[0], parts[1]
if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
return nil
}
action, location := findActionKeywords(content, start)
return &rawReference{
index: index,
owner: owner,
name: name,
action: action,
issue: issue,
isPull: isPull,
refLocation: &RefSpan{Start: start, End: end},
actionLocation: location,
}
}
func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
newKeywords()
var m []int
if issueCloseKeywordsPat != nil {
m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
if m != nil {
return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
}
}
if issueReopenKeywordsPat != nil {
m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
if m != nil {
return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
}
}
return XRefActionNone, nil
}
// IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool {
if extTracker {
// External issues cannot be automatically closed
return false
}
return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens
}