From a35a5b225c6f49a91459ae3e9e97d0ad6a7c16bd Mon Sep 17 00:00:00 2001
From: KN4CK3R <KN4CK3R@users.noreply.github.com>
Date: Mon, 12 Apr 2021 16:49:26 +0200
Subject: [PATCH] Add ETag header (#15370)

* Add ETag header.

* Comply with RFC 7232.

* Moved logic into httpcache.go

* Changed name.

* Lint

* Implemented If-None-Match list.

* Fixed missing header on *

* Removed weak etag support.

* Removed * support.

* Added unit test.

* Lint

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
---
 modules/httpcache/httpcache.go      |  41 ++++++--
 modules/httpcache/httpcache_test.go | 144 ++++++++++++++++++++++++++++
 modules/public/public.go            |   2 +-
 routers/repo/attachment.go          |  20 ++--
 routers/repo/download.go            |  13 +++
 5 files changed, 200 insertions(+), 20 deletions(-)
 create mode 100644 modules/httpcache/httpcache_test.go

diff --git a/modules/httpcache/httpcache.go b/modules/httpcache/httpcache.go
index cf35cef129..f5e3906be6 100644
--- a/modules/httpcache/httpcache.go
+++ b/modules/httpcache/httpcache.go
@@ -10,6 +10,7 @@ import (
 	"net/http"
 	"os"
 	"strconv"
+	"strings"
 	"time"
 
 	"code.gitea.io/gitea/modules/setting"
@@ -26,11 +27,13 @@ func GetCacheControl() string {
 // generateETag generates an ETag based on size, filename and file modification time
 func generateETag(fi os.FileInfo) string {
 	etag := fmt.Sprint(fi.Size()) + fi.Name() + fi.ModTime().UTC().Format(http.TimeFormat)
-	return base64.StdEncoding.EncodeToString([]byte(etag))
+	return `"` + base64.StdEncoding.EncodeToString([]byte(etag)) + `"`
 }
 
 // HandleTimeCache handles time-based caching for a HTTP request
 func HandleTimeCache(req *http.Request, w http.ResponseWriter, fi os.FileInfo) (handled bool) {
+	w.Header().Set("Cache-Control", GetCacheControl())
+
 	ifModifiedSince := req.Header.Get("If-Modified-Since")
 	if ifModifiedSince != "" {
 		t, err := time.Parse(http.TimeFormat, ifModifiedSince)
@@ -40,20 +43,40 @@ func HandleTimeCache(req *http.Request, w http.ResponseWriter, fi os.FileInfo) (
 		}
 	}
 
-	w.Header().Set("Cache-Control", GetCacheControl())
 	w.Header().Set("Last-Modified", fi.ModTime().Format(http.TimeFormat))
 	return false
 }
 
-// HandleEtagCache handles ETag-based caching for a HTTP request
-func HandleEtagCache(req *http.Request, w http.ResponseWriter, fi os.FileInfo) (handled bool) {
+// HandleFileETagCache handles ETag-based caching for a HTTP request
+func HandleFileETagCache(req *http.Request, w http.ResponseWriter, fi os.FileInfo) (handled bool) {
 	etag := generateETag(fi)
-	if req.Header.Get("If-None-Match") == etag {
-		w.WriteHeader(http.StatusNotModified)
-		return true
-	}
+	return HandleGenericETagCache(req, w, etag)
+}
 
+// HandleGenericETagCache handles ETag-based caching for a HTTP request.
+// It returns true if the request was handled.
+func HandleGenericETagCache(req *http.Request, w http.ResponseWriter, etag string) (handled bool) {
+	if len(etag) > 0 {
+		w.Header().Set("Etag", etag)
+		if checkIfNoneMatchIsValid(req, etag) {
+			w.WriteHeader(http.StatusNotModified)
+			return true
+		}
+	}
 	w.Header().Set("Cache-Control", GetCacheControl())
-	w.Header().Set("ETag", etag)
+	return false
+}
+
+// checkIfNoneMatchIsValid tests if the header If-None-Match matches the ETag
+func checkIfNoneMatchIsValid(req *http.Request, etag string) bool {
+	ifNoneMatch := req.Header.Get("If-None-Match")
+	if len(ifNoneMatch) > 0 {
+		for _, item := range strings.Split(ifNoneMatch, ",") {
+			item = strings.TrimSpace(item)
+			if item == etag {
+				return true
+			}
+		}
+	}
 	return false
 }
diff --git a/modules/httpcache/httpcache_test.go b/modules/httpcache/httpcache_test.go
new file mode 100644
index 0000000000..fe5ca17956
--- /dev/null
+++ b/modules/httpcache/httpcache_test.go
@@ -0,0 +1,144 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package httpcache
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+)
+
+type mockFileInfo struct {
+}
+
+func (m mockFileInfo) Name() string       { return "gitea.test" }
+func (m mockFileInfo) Size() int64        { return int64(10) }
+func (m mockFileInfo) Mode() os.FileMode  { return os.ModePerm }
+func (m mockFileInfo) ModTime() time.Time { return time.Time{} }
+func (m mockFileInfo) IsDir() bool        { return false }
+func (m mockFileInfo) Sys() interface{}   { return nil }
+
+func TestHandleFileETagCache(t *testing.T) {
+	fi := mockFileInfo{}
+	etag := `"MTBnaXRlYS50ZXN0TW9uLCAwMSBKYW4gMDAwMSAwMDowMDowMCBHTVQ="`
+
+	t.Run("No_If-None-Match", func(t *testing.T) {
+		req := &http.Request{Header: make(http.Header)}
+		w := httptest.NewRecorder()
+
+		handled := HandleFileETagCache(req, w, fi)
+
+		assert.False(t, handled)
+		assert.Len(t, w.Header(), 2)
+		assert.Contains(t, w.Header(), "Cache-Control")
+		assert.Contains(t, w.Header(), "Etag")
+		assert.Equal(t, etag, w.Header().Get("Etag"))
+	})
+	t.Run("Wrong_If-None-Match", func(t *testing.T) {
+		req := &http.Request{Header: make(http.Header)}
+		w := httptest.NewRecorder()
+
+		req.Header.Set("If-None-Match", `"wrong etag"`)
+
+		handled := HandleFileETagCache(req, w, fi)
+
+		assert.False(t, handled)
+		assert.Len(t, w.Header(), 2)
+		assert.Contains(t, w.Header(), "Cache-Control")
+		assert.Contains(t, w.Header(), "Etag")
+		assert.Equal(t, etag, w.Header().Get("Etag"))
+	})
+	t.Run("Correct_If-None-Match", func(t *testing.T) {
+		req := &http.Request{Header: make(http.Header)}
+		w := httptest.NewRecorder()
+
+		req.Header.Set("If-None-Match", etag)
+
+		handled := HandleFileETagCache(req, w, fi)
+
+		assert.True(t, handled)
+		assert.Len(t, w.Header(), 1)
+		assert.Contains(t, w.Header(), "Etag")
+		assert.Equal(t, etag, w.Header().Get("Etag"))
+		assert.Equal(t, http.StatusNotModified, w.Code)
+	})
+}
+
+func TestHandleGenericETagCache(t *testing.T) {
+	etag := `"test"`
+
+	t.Run("No_If-None-Match", func(t *testing.T) {
+		req := &http.Request{Header: make(http.Header)}
+		w := httptest.NewRecorder()
+
+		handled := HandleGenericETagCache(req, w, etag)
+
+		assert.False(t, handled)
+		assert.Len(t, w.Header(), 2)
+		assert.Contains(t, w.Header(), "Cache-Control")
+		assert.Contains(t, w.Header(), "Etag")
+		assert.Equal(t, etag, w.Header().Get("Etag"))
+	})
+	t.Run("Wrong_If-None-Match", func(t *testing.T) {
+		req := &http.Request{Header: make(http.Header)}
+		w := httptest.NewRecorder()
+
+		req.Header.Set("If-None-Match", `"wrong etag"`)
+
+		handled := HandleGenericETagCache(req, w, etag)
+
+		assert.False(t, handled)
+		assert.Len(t, w.Header(), 2)
+		assert.Contains(t, w.Header(), "Cache-Control")
+		assert.Contains(t, w.Header(), "Etag")
+		assert.Equal(t, etag, w.Header().Get("Etag"))
+	})
+	t.Run("Correct_If-None-Match", func(t *testing.T) {
+		req := &http.Request{Header: make(http.Header)}
+		w := httptest.NewRecorder()
+
+		req.Header.Set("If-None-Match", etag)
+
+		handled := HandleGenericETagCache(req, w, etag)
+
+		assert.True(t, handled)
+		assert.Len(t, w.Header(), 1)
+		assert.Contains(t, w.Header(), "Etag")
+		assert.Equal(t, etag, w.Header().Get("Etag"))
+		assert.Equal(t, http.StatusNotModified, w.Code)
+	})
+	t.Run("Multiple_Wrong_If-None-Match", func(t *testing.T) {
+		req := &http.Request{Header: make(http.Header)}
+		w := httptest.NewRecorder()
+
+		req.Header.Set("If-None-Match", `"wrong etag", "wrong etag "`)
+
+		handled := HandleGenericETagCache(req, w, etag)
+
+		assert.False(t, handled)
+		assert.Len(t, w.Header(), 2)
+		assert.Contains(t, w.Header(), "Cache-Control")
+		assert.Contains(t, w.Header(), "Etag")
+		assert.Equal(t, etag, w.Header().Get("Etag"))
+	})
+	t.Run("Multiple_Correct_If-None-Match", func(t *testing.T) {
+		req := &http.Request{Header: make(http.Header)}
+		w := httptest.NewRecorder()
+
+		req.Header.Set("If-None-Match", `"wrong etag", `+etag)
+
+		handled := HandleGenericETagCache(req, w, etag)
+
+		assert.True(t, handled)
+		assert.Len(t, w.Header(), 1)
+		assert.Contains(t, w.Header(), "Etag")
+		assert.Equal(t, etag, w.Header().Get("Etag"))
+		assert.Equal(t, http.StatusNotModified, w.Code)
+	})
+}
diff --git a/modules/public/public.go b/modules/public/public.go
index ee3d2cf75f..14525cfa0e 100644
--- a/modules/public/public.go
+++ b/modules/public/public.go
@@ -165,7 +165,7 @@ func (opts *Options) handle(w http.ResponseWriter, req *http.Request, opt *Optio
 		log.Println("[Static] Serving " + file)
 	}
 
-	if httpcache.HandleEtagCache(req, w, fi) {
+	if httpcache.HandleFileETagCache(req, w, fi) {
 		return true
 	}
 
diff --git a/routers/repo/attachment.go b/routers/repo/attachment.go
index a896e4a501..f53e7450ae 100644
--- a/routers/repo/attachment.go
+++ b/routers/repo/attachment.go
@@ -10,6 +10,7 @@ import (
 
 	"code.gitea.io/gitea/models"
 	"code.gitea.io/gitea/modules/context"
+	"code.gitea.io/gitea/modules/httpcache"
 	"code.gitea.io/gitea/modules/log"
 	"code.gitea.io/gitea/modules/setting"
 	"code.gitea.io/gitea/modules/storage"
@@ -124,21 +125,25 @@ func GetAttachment(ctx *context.Context) {
 		}
 	}
 
+	if err := attach.IncreaseDownloadCount(); err != nil {
+		ctx.ServerError("IncreaseDownloadCount", err)
+		return
+	}
+
 	if setting.Attachment.ServeDirect {
 		//If we have a signed url (S3, object storage), redirect to this directly.
 		u, err := storage.Attachments.URL(attach.RelativePath(), attach.Name)
 
 		if u != nil && err == nil {
-			if err := attach.IncreaseDownloadCount(); err != nil {
-				ctx.ServerError("Update", err)
-				return
-			}
-
 			ctx.Redirect(u.String())
 			return
 		}
 	}
 
+	if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+attach.UUID+`"`) {
+		return
+	}
+
 	//If we have matched and access to release or issue
 	fr, err := storage.Attachments.Open(attach.RelativePath())
 	if err != nil {
@@ -147,11 +152,6 @@ func GetAttachment(ctx *context.Context) {
 	}
 	defer fr.Close()
 
-	if err := attach.IncreaseDownloadCount(); err != nil {
-		ctx.ServerError("Update", err)
-		return
-	}
-
 	if err = ServeData(ctx, attach.Name, attach.Size, fr); err != nil {
 		ctx.ServerError("ServeData", err)
 		return
diff --git a/routers/repo/download.go b/routers/repo/download.go
index 63a9ca47d7..1eedec8cb1 100644
--- a/routers/repo/download.go
+++ b/routers/repo/download.go
@@ -15,6 +15,7 @@ import (
 	"code.gitea.io/gitea/modules/charset"
 	"code.gitea.io/gitea/modules/context"
 	"code.gitea.io/gitea/modules/git"
+	"code.gitea.io/gitea/modules/httpcache"
 	"code.gitea.io/gitea/modules/lfs"
 	"code.gitea.io/gitea/modules/log"
 )
@@ -31,6 +32,7 @@ func ServeData(ctx *context.Context, name string, size int64, reader io.Reader)
 	}
 
 	ctx.Resp.Header().Set("Cache-Control", "public,max-age=86400")
+
 	if size >= 0 {
 		ctx.Resp.Header().Set("Content-Length", fmt.Sprintf("%d", size))
 	} else {
@@ -71,6 +73,10 @@ func ServeData(ctx *context.Context, name string, size int64, reader io.Reader)
 
 // ServeBlob download a git.Blob
 func ServeBlob(ctx *context.Context, blob *git.Blob) error {
+	if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`) {
+		return nil
+	}
+
 	dataRc, err := blob.DataAsync()
 	if err != nil {
 		return err
@@ -86,6 +92,10 @@ func ServeBlob(ctx *context.Context, blob *git.Blob) error {
 
 // ServeBlobOrLFS download a git.Blob redirecting to LFS if necessary
 func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob) error {
+	if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`) {
+		return nil
+	}
+
 	dataRc, err := blob.DataAsync()
 	if err != nil {
 		return err
@@ -102,6 +112,9 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob) error {
 		if meta == nil {
 			return ServeBlob(ctx, blob)
 		}
+		if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+pointer.Oid+`"`) {
+			return nil
+		}
 		lfsDataRc, err := lfs.ReadMetaObject(meta.Pointer)
 		if err != nil {
 			return err