From e4bda0e8457d00c01b83f153ed5a4a8ea4cf85c8 Mon Sep 17 00:00:00 2001 From: Gusted <postmaster@gusted.xyz> Date: Fri, 18 Aug 2023 04:39:23 +0200 Subject: [PATCH] [GITEA] Add slow SQL query warning - Databases are one of the most important parts of Forgejo, every interaction with Forgejo uses the database in one way or another. Therefore, it is important to maintain the database and recognize when Forgejo is not doing well with the database. Forgejo already has the option to log *every* SQL query along with its execution time, but monitoring becomes impractical for larger instances and takes up unnecessary storage in the logs. - Add a QoL enhancement that allows instance administrators to specify a threshold value beyond which query execution time is logged as a warning in the xorm logger. The default value is a conservative five seconds to avoid this becoming a source of spam in the logs. - The use case for this patch is that with an instance the size of Codeberg, monitoring SQL logs is not very fruitful and most of them are uninteresting. Recently, in the context of persistent deadlock issues (https://codeberg.org/forgejo/forgejo/issues/220), I have noticed that certain queries hold locks on tables like comment and issue for several seconds. This patch helps to identify which queries these are and when they happen. - Added unit test. (cherry picked from commit 24bbe7886fb4cb9a38c8dab8c44f4c9cbfa25481) (cherry picked from commit 6e29145b3c1455498531593d38e6a914941a12cb) (cherry picked from commit 63731e30712872bd2395eb3cf36d9996e5793645) (cherry picked from commit 3ce1a097369c132654de70df707b867e47bd1c40) (cherry picked from commit a64426907de788cc0937a7a2b16af4d2f26f7fe6) (cherry picked from commit 4b1921569156445c58d9889602733da5934c7b95) (cherry picked from commit e6356744359fa947c049827d60c2ea0e277e03dc) (cherry picked from commit 9cf501f1af4cd870221cef6af489618785b71186) (cherry picked from commit 0d6b934eba1c0e9b27b364791113aae816b6b366) (cherry picked from commit 4b6c2738795002887844a106f2fed2ef1673eed1) (cherry picked from commit 89b1315338b0c7a726a36a84e9844013a13560b8) (cherry picked from commit edd8e66ce991c395bb0af7720631c3cd26caaa51) [GITEA] Add slow SQL query warning (squash) document the setting (cherry picked from commit ce38599c5141c7fc6bc054819f5ff1c1b45bda1f) (cherry picked from commit 794aa67c68c8e24ac7301eb7ef767c6e2499a78d) (cherry picked from commit a4c2c6b004c21488e90f637ca7920f49108ed75d) (cherry picked from commit 97912752bc802db79bb26a6591aec885aea30ee4) (cherry picked from commit 00b5327c9750215a290238516e7b6fb1e6601e14) (cherry picked from commit 1069c860e78c11225b4d74ff3044df7786562821) (cherry picked from commit 84241f42c83852918b57c8bd25364697037fe42f) --- custom/conf/app.example.ini | 4 ++ .../config-cheat-sheet.en-us.md | 1 + models/db/engine.go | 28 ++++++++++++++ models/db/engine_test.go | 38 +++++++++++++++++++ modules/setting/database.go | 2 + 5 files changed, 73 insertions(+) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index a92181d12f..263d08a2f9 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -410,6 +410,10 @@ USER = root ;; ;; Whether execute database models migrations automatically ;AUTO_MIGRATION = true +;; +;; Threshold value (in seconds) beyond which query execution time is logged as a warning in the xorm logger +;; +;SLOW_QUERY_TRESHOLD = 5s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/administration/config-cheat-sheet.en-us.md b/docs/content/administration/config-cheat-sheet.en-us.md index c9e6a937c3..99b0be969a 100644 --- a/docs/content/administration/config-cheat-sheet.en-us.md +++ b/docs/content/administration/config-cheat-sheet.en-us.md @@ -456,6 +456,7 @@ The following configuration set `Content-Type: application/vnd.android.package-a - `MAX_IDLE_CONNS` **2**: Max idle database connections on connection pool, default is 2 - this will be capped to `MAX_OPEN_CONNS`. - `CONN_MAX_LIFETIME` **0 or 3s**: Sets the maximum amount of time a DB connection may be reused - default is 0, meaning there is no limit (except on MySQL where it is 3s - see #6804 & #7071). - `AUTO_MIGRATION` **true**: Whether execute database models migrations automatically. +- `SLOW_QUERY_TRESHOLD` **5s**: Threshold value in seconds beyond which query execution time is logged as a warning in the xorm logger. [^1]: It may be necessary to specify a hostport even when listening on a unix socket, as the port is part of the socket name. see [#24552](https://github.com/go-gitea/gitea/issues/24552#issuecomment-1681649367) for additional details. diff --git a/models/db/engine.go b/models/db/engine.go index b5a41f93e3..0fcd4849bf 100755 --- a/models/db/engine.go +++ b/models/db/engine.go @@ -11,10 +11,13 @@ import ( "io" "reflect" "strings" + "time" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "xorm.io/xorm" + "xorm.io/xorm/contexts" "xorm.io/xorm/names" "xorm.io/xorm/schemas" @@ -147,6 +150,13 @@ func InitEngine(ctx context.Context) error { xormEngine.SetConnMaxLifetime(setting.Database.ConnMaxLifetime) xormEngine.SetDefaultContext(ctx) + if setting.Database.SlowQueryTreshold > 0 { + xormEngine.AddHook(&SlowQueryHook{ + Treshold: setting.Database.SlowQueryTreshold, + Logger: log.GetLogger("xorm"), + }) + } + SetDefaultEngine(ctx, xormEngine) return nil } @@ -300,3 +310,21 @@ func SetLogSQL(ctx context.Context, on bool) { sess.Engine().ShowSQL(on) } } + +type SlowQueryHook struct { + Treshold time.Duration + Logger log.Logger +} + +var _ contexts.Hook = &SlowQueryHook{} + +func (SlowQueryHook) BeforeProcess(c *contexts.ContextHook) (context.Context, error) { + return c.Ctx, nil +} + +func (h *SlowQueryHook) AfterProcess(c *contexts.ContextHook) error { + if c.ExecuteTime >= h.Treshold { + h.Logger.Log(8, log.WARN, "[Slow SQL Query] %s %v - %v", c.SQL, c.Args, c.ExecuteTime) + } + return nil +} diff --git a/models/db/engine_test.go b/models/db/engine_test.go index c9ae5f1542..ba922821b0 100644 --- a/models/db/engine_test.go +++ b/models/db/engine_test.go @@ -6,15 +6,19 @@ package db_test import ( "path/filepath" "testing" + "time" "code.gitea.io/gitea/models/db" issues_model "code.gitea.io/gitea/models/issues" "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" _ "code.gitea.io/gitea/cmd" // for TestPrimaryKeys "github.com/stretchr/testify/assert" + "xorm.io/xorm" ) func TestDumpDatabase(t *testing.T) { @@ -85,3 +89,37 @@ func TestPrimaryKeys(t *testing.T) { } } } + +func TestSlowQuery(t *testing.T) { + lc, cleanup := test.NewLogChecker("slow-query") + lc.StopMark("[Slow SQL Query]") + defer cleanup() + + e := db.GetEngine(db.DefaultContext) + engine, ok := e.(*xorm.Engine) + assert.True(t, ok) + + // It's not possible to clean this up with XORM, but it's luckily not harmful + // to leave around. + engine.AddHook(&db.SlowQueryHook{ + Treshold: time.Second * 10, + Logger: log.GetLogger("slow-query"), + }) + + // NOOP query. + e.Exec("SELECT 1 WHERE false;") + + _, stopped := lc.Check(100 * time.Millisecond) + assert.False(t, stopped) + + engine.AddHook(&db.SlowQueryHook{ + Treshold: 0, // Every query should be logged. + Logger: log.GetLogger("slow-query"), + }) + + // NOOP query. + e.Exec("SELECT 1 WHERE false;") + + _, stopped = lc.Check(100 * time.Millisecond) + assert.True(t, stopped) +} diff --git a/modules/setting/database.go b/modules/setting/database.go index 761e767e8f..62681bb45c 100644 --- a/modules/setting/database.go +++ b/modules/setting/database.go @@ -45,6 +45,7 @@ var ( ConnMaxLifetime time.Duration IterateBufferSize int AutoMigration bool + SlowQueryTreshold time.Duration }{ Timeout: 500, IterateBufferSize: 50, @@ -87,6 +88,7 @@ func loadDBSetting(rootCfg ConfigProvider) { Database.DBConnectRetries = sec.Key("DB_RETRIES").MustInt(10) Database.DBConnectBackoff = sec.Key("DB_RETRY_BACKOFF").MustDuration(3 * time.Second) Database.AutoMigration = sec.Key("AUTO_MIGRATION").MustBool(true) + Database.SlowQueryTreshold = sec.Key("SLOW_QUERY_TRESHOLD").MustDuration(5 * time.Second) } // DBConnStr returns database connection string