From 75316348c50d03d9aed760e9b603275995e4e3e3 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 26 Jun 2018 18:24:42 +0200 Subject: [PATCH] Prune web hook logs older than 90 days This adds a recurring Sidekiq job that removes up to 50 000 old web hook logs per hour, if they are older than 90 days. This will prevent the web_hook_logs table from growing indefinitely. Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/46120 --- app/workers/all_queues.yml | 1 + app/workers/prune_web_hook_logs_worker.rb | 26 +++++++++++++++++++ changelogs/unreleased/prune-web-hook-logs.yml | 5 ++++ config/initializers/1_settings.rb | 4 +++ doc/user/project/integrations/webhooks.md | 16 +++++++----- .../prune_web_hook_logs_worker_spec.rb | 22 ++++++++++++++++ 6 files changed, 68 insertions(+), 6 deletions(-) create mode 100644 app/workers/prune_web_hook_logs_worker.rb create mode 100644 changelogs/unreleased/prune-web-hook-logs.yml create mode 100644 spec/workers/prune_web_hook_logs_worker_spec.rb diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index d06f51b1828..b8b854853b7 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -20,6 +20,7 @@ - cronjob:ci_archive_traces_cron - cronjob:trending_projects - cronjob:issue_due_scheduler +- cronjob:prune_web_hook_logs - gcp_cluster:cluster_install_app - gcp_cluster:cluster_provision diff --git a/app/workers/prune_web_hook_logs_worker.rb b/app/workers/prune_web_hook_logs_worker.rb new file mode 100644 index 00000000000..45c7d32f7eb --- /dev/null +++ b/app/workers/prune_web_hook_logs_worker.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +# Worker that deletes a fixed number of outdated rows from the "web_hook_logs" +# table. +class PruneWebHookLogsWorker + include ApplicationWorker + include CronjobQueue + + # The maximum number of rows to remove in a single job. + DELETE_LIMIT = 50_000 + + def perform + # MySQL doesn't allow "DELETE FROM ... WHERE id IN ( ... )" if the inner + # query refers to the same table. To work around this we wrap the IN body in + # another sub query. + WebHookLog + .where( + 'id IN (SELECT id FROM (?) ids_to_remove)', + WebHookLog + .select(:id) + .where('created_at < ?', 90.days.ago.beginning_of_day) + .limit(DELETE_LIMIT) + ) + .delete_all + end +end diff --git a/changelogs/unreleased/prune-web-hook-logs.yml b/changelogs/unreleased/prune-web-hook-logs.yml new file mode 100644 index 00000000000..e8c805b2a92 --- /dev/null +++ b/changelogs/unreleased/prune-web-hook-logs.yml @@ -0,0 +1,5 @@ +--- +title: Prune web hook logs older than 90 days +merge_request: +author: +type: added diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 550647ae1c6..693a2934a1b 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -338,6 +338,10 @@ Settings.cron_jobs['issue_due_scheduler_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['issue_due_scheduler_worker']['cron'] ||= '50 00 * * *' Settings.cron_jobs['issue_due_scheduler_worker']['job_class'] = 'IssueDueSchedulerWorker' +Settings.cron_jobs['prune_web_hook_logs_worker'] ||= Settingslogic.new({}) +Settings.cron_jobs['prune_web_hook_logs_worker']['cron'] ||= '0 */1 * * *' +Settings.cron_jobs['prune_web_hook_logs_worker']['job_class'] = 'PruneWebHookLogsWorker' + # # Sidekiq # diff --git a/doc/user/project/integrations/webhooks.md b/doc/user/project/integrations/webhooks.md index 19df78f4140..8c09927e2df 100644 --- a/doc/user/project/integrations/webhooks.md +++ b/doc/user/project/integrations/webhooks.md @@ -6,6 +6,10 @@ Starting from GitLab 8.5: - the `project.ssh_url` key is deprecated in favor of the `project.git_ssh_url` key - the `project.http_url` key is deprecated in favor of the `project.git_http_url` key +>**Note:** +Starting from GitLab 11.1, the logs of web hooks are automatically removed after +one month. + Project webhooks allow you to trigger a URL if for example new code is pushed or a new issue is created. You can configure webhooks to listen for specific events like pushes, issues or merge requests. GitLab will send a POST request with data @@ -54,11 +58,11 @@ Below are described the supported events. Triggered when you push to the repository except when pushing tags. -> **Note:** When more than 20 commits are pushed at once, the `commits` web hook - attribute will only contain the first 20 for performance reasons. Loading - detailed commit data is expensive. Note that despite only 20 commits being +> **Note:** When more than 20 commits are pushed at once, the `commits` web hook + attribute will only contain the first 20 for performance reasons. Loading + detailed commit data is expensive. Note that despite only 20 commits being present in the `commits` attribute, the `total_commits_count` attribute will - contain the actual total. + contain the actual total. **Request header**: @@ -1149,11 +1153,11 @@ From this page, you can repeat delivery with the same data by clicking `Resend R When GitLab sends a webhook it expects a response in 10 seconds (set default value). If it does not receive one, it'll retry the webhook. If the endpoint doesn't send its HTTP response within those 10 seconds, GitLab may decide the hook failed and retry it. -If you are receiving multiple requests, you can try increasing the default value to wait for the HTTP response after sending the webhook +If you are receiving multiple requests, you can try increasing the default value to wait for the HTTP response after sending the webhook by uncommenting or adding the following setting to your `/etc/gitlab/gitlab.rb`: ``` -gitlab_rails['webhook_timeout'] = 10 +gitlab_rails['webhook_timeout'] = 10 ``` ## Example webhook receiver diff --git a/spec/workers/prune_web_hook_logs_worker_spec.rb b/spec/workers/prune_web_hook_logs_worker_spec.rb new file mode 100644 index 00000000000..d7d64a1f641 --- /dev/null +++ b/spec/workers/prune_web_hook_logs_worker_spec.rb @@ -0,0 +1,22 @@ +require 'spec_helper' + +describe PruneWebHookLogsWorker do + describe '#perform' do + before do + hook = create(:project_hook) + + 5.times do + create(:web_hook_log, web_hook: hook, created_at: 5.months.ago) + end + + create(:web_hook_log, web_hook: hook, response_status: '404') + end + + it 'removes all web hook logs older than one month' do + described_class.new.perform + + expect(WebHookLog.count).to eq(1) + expect(WebHookLog.first.response_status).to eq('404') + end + end +end